Subzero: Run sandboxed cross tests, and do some cleanup. Tests all cross tests in both sandboxed and unsandboxed modes. Unfortunately, crosstest run time is more than doubled because of LTO of the crosstest drivers. (We may want to add "full" and "lite" versions of cross tests.) LLVM triple strings are removed where possible (from .ll files), and when generated, we use just i686 or i686-nacl. "Fix" the integrated assembler to emit the lock prefix after the 16-bit operand prefix, to make the validator happy. Don't add external symbol references to the ELF file for llvm.* intrinsic functions. Make the ELF object writer honor the -externalize argument. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4092 R=jvoung@chromium.org Review URL: https://codereview.chromium.org/973823003

commit: a67fc448e9779ac8b7d9751bd7cf32f94047a345 [log] [tgz]
author: Jim Stichnoth <stichnot@chromium.org> Tue Mar 03 16:13:11 2015 -0800
committer: Jim Stichnoth <stichnot@chromium.org> Tue Mar 03 16:13:11 2015 -0800
tree: f1af2ac5df1c353b468aa197851767075738c940
parent: 437ceff23f529d87a6dac702dd4bfbaec021321f [diff]
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh
index c76cdef..a8b0503 100755
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh

@@ -10,140 +10,164 @@
 PATH="../pydir:${PATH}"
 OPTLEVELS="m1 2"
 ATTRIBUTES="sse2 sse4.1"
+SANDBOX="0 1"
 OUTDIR=Output
 # Clean the output directory to avoid reusing stale results.
 rm -rf "${OUTDIR}"
 mkdir -p "${OUTDIR}"
 
-for optlevel in ${OPTLEVELS} ; do
+for sb in ${SANDBOX} ; do
+  for optlevel in ${OPTLEVELS} ; do
     for attribute in ${ATTRIBUTES} ; do
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=simple_loop.c \
-            --driver=simple_loop_main.c \
-            --output=simple_loop_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=simple_loop.c \
+        --driver=simple_loop_main.c \
+        --output=simple_loop_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=mem_intrin.cpp \
-            --driver=mem_intrin_main.cpp \
-            --output=mem_intrin_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=mem_intrin.cpp \
+        --driver=mem_intrin_main.cpp \
+        --output=mem_intrin_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_arith.cpp \
-            --test=test_arith_frem.ll \
-            --test=test_arith_sqrt.ll \
-            --driver=test_arith_main.cpp \
-            --output=test_arith_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_arith.cpp \
+        --test=test_arith_frem.ll \
+        --test=test_arith_sqrt.ll \
+        --driver=test_arith_main.cpp \
+        --output=test_arith_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_bitmanip.cpp --test=test_bitmanip_intrin.ll \
-            --driver=test_bitmanip_main.cpp \
-            --output=test_bitmanip_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_bitmanip.cpp --test=test_bitmanip_intrin.ll \
+        --driver=test_bitmanip_main.cpp \
+        --output=test_bitmanip_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_calling_conv.cpp \
-            --driver=test_calling_conv_main.cpp \
-            --output=test_calling_conv_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_calling_conv.cpp \
+        --driver=test_calling_conv_main.cpp \
+        --output=test_calling_conv_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_cast.cpp --test=test_cast_to_u1.ll \
-            --test=test_cast_vectors.ll \
-            --driver=test_cast_main.cpp \
-            --output=test_cast_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_cast.cpp --test=test_cast_to_u1.ll \
+        --test=test_cast_vectors.ll \
+        --driver=test_cast_main.cpp \
+        --output=test_cast_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_fcmp.pnacl.ll \
-            --driver=test_fcmp_main.cpp \
-            --output=test_fcmp_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_fcmp.pnacl.ll \
+        --driver=test_fcmp_main.cpp \
+        --output=test_fcmp_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_global.cpp \
-            --driver=test_global_main.cpp \
-            --output=test_global_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_global.cpp \
+        --driver=test_global_main.cpp \
+        --output=test_global_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_icmp.cpp --test=test_icmp_i1vec.ll \
-            --driver=test_icmp_main.cpp \
-            --output=test_icmp_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_icmp.cpp --test=test_icmp_i1vec.ll \
+        --driver=test_icmp_main.cpp \
+        --output=test_icmp_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_select.ll \
-            --driver=test_select_main.cpp \
-            --output=test_select_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_select.ll \
+        --driver=test_select_main.cpp \
+        --output=test_select_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_stacksave.c \
-            --driver=test_stacksave_main.c \
-            --output=test_stacksave_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_stacksave.c \
+        --driver=test_stacksave_main.c \
+        --output=test_stacksave_sb${sb}_O${optlevel}_${attribute}
 
-        # Compile the non-subzero object files straight from source
-        # since the native LLVM backend does not understand how to
-        # lower NaCl-specific intrinsics.
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ \
-            --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_sync_atomic.cpp \
-            --crosstest-bitcode=0 \
-            --driver=test_sync_atomic_main.cpp \
-            --output=test_sync_atomic_O${optlevel}_${attribute}
+      # Compile the non-subzero object files straight from source
+      # since the native LLVM backend does not understand how to
+      # lower NaCl-specific intrinsics.
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_sync_atomic.cpp \
+        --crosstest-bitcode=0 \
+        --driver=test_sync_atomic_main.cpp \
+        --output=test_sync_atomic_sb${sb}_O${optlevel}_${attribute}
 
-        crosstest.py -O${optlevel} --mattr ${attribute} \
-            --prefix=Subzero_ --target=x8632 \
-            --dir="${OUTDIR}" \
-            --test=test_vector_ops.ll \
-            --driver=test_vector_ops_main.cpp \
-            --output=test_vector_ops_O${optlevel}_${attribute}
+      crosstest.py -O${optlevel} --mattr ${attribute} \
+        --prefix=Subzero_ \
+        --target=x8632 \
+        --sandbox=${sb} \
+        --dir="${OUTDIR}" \
+        --test=test_vector_ops.ll \
+        --driver=test_vector_ops_main.cpp \
+        --output=test_vector_ops_sb${sb}_O${optlevel}_${attribute}
 
     done
+  done
 done
 
-for optlevel in ${OPTLEVELS} ; do
+for sb in ${SANDBOX} ; do
+  if [ $sb = 0 ] ; then
+    PREFIX=
+  else
+    PREFIX="../../../../run.py -q"
+  fi
+  for optlevel in ${OPTLEVELS} ; do
     for attribute in ${ATTRIBUTES}; do
-        "${OUTDIR}"/simple_loop_O${optlevel}_${attribute}
-        "${OUTDIR}"/mem_intrin_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_arith_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_bitmanip_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_calling_conv_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_cast_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_fcmp_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_global_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_icmp_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_select_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_stacksave_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_sync_atomic_O${optlevel}_${attribute}
-        "${OUTDIR}"/test_vector_ops_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/simple_loop_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/mem_intrin_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_arith_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_bitmanip_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_calling_conv_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_cast_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_fcmp_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_global_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_icmp_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_select_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_stacksave_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_sync_atomic_sb${sb}_O${optlevel}_${attribute}
+      ${PREFIX} "${OUTDIR}"/test_vector_ops_sb${sb}_O${optlevel}_${attribute}
     done
+  done
 done

diff --git a/crosstest/test_arith_frem.ll b/crosstest/test_arith_frem.ll
index bb0590d..42114d4 100644
--- a/crosstest/test_arith_frem.ll
+++ b/crosstest/test_arith_frem.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 define float @_Z6myFremff(float %a, float %b) {
   %rem = frem float %a, %b
   ret float %rem

diff --git a/crosstest/test_arith_sqrt.ll b/crosstest/test_arith_sqrt.ll
index 2550aeb..4e91d8d 100644
--- a/crosstest/test_arith_sqrt.ll
+++ b/crosstest/test_arith_sqrt.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 declare float @llvm.sqrt.f32(float)
 declare double @llvm.sqrt.f64(double)
 

diff --git a/crosstest/test_bitmanip_intrin.ll b/crosstest/test_bitmanip_intrin.ll
index 23df538..cce36f2 100644
--- a/crosstest/test_bitmanip_intrin.ll
+++ b/crosstest/test_bitmanip_intrin.ll

@@ -4,8 +4,6 @@
 ; Also, normalize the intrinsic to take a single parameter when there
 ; can be two, as is the case for ctlz and cttz.
 
-target triple = "i686-pc-linux-gnu"
-
 declare i32 @llvm.ctlz.i32(i32, i1)
 declare i64 @llvm.ctlz.i64(i64, i1)
 

diff --git a/crosstest/test_cast_to_u1.ll b/crosstest/test_cast_to_u1.ll
index f8a9ec6..31c1f07 100644
--- a/crosstest/test_cast_to_u1.ll
+++ b/crosstest/test_cast_to_u1.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 define i32 @_Z4castIxbET0_T_(i64 %a) {
 entry:
 ;  %tobool = icmp ne i64 %a, 0

diff --git a/crosstest/test_cast_vectors.ll b/crosstest/test_cast_vectors.ll
index 1c9ffff..4a9b989 100644
--- a/crosstest/test_cast_vectors.ll
+++ b/crosstest/test_cast_vectors.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 define <4 x float> @_Z4castIDv4_iDv4_fET0_T_(<4 x i32> %a) {
 entry:
   %0 = sitofp <4 x i32> %a to <4 x float>

diff --git a/crosstest/test_fcmp.pnacl.ll b/crosstest/test_fcmp.pnacl.ll
index a175b1d..0637892 100644
--- a/crosstest/test_fcmp.pnacl.ll
+++ b/crosstest/test_fcmp.pnacl.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 ; This file is extracted from fp.pnacl.ll and vector-fcmp.ll in the lit
 ; tests, with the "internal" attribute removed from the functions.
 

diff --git a/crosstest/test_icmp_i1vec.ll b/crosstest/test_icmp_i1vec.ll
index bf9aa37..78c14ee 100644
--- a/crosstest/test_icmp_i1vec.ll
+++ b/crosstest/test_icmp_i1vec.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 define <16 x i8> @_Z8icmpi1EqDv16_aS_(<16 x i8> %a, <16 x i8> %b) {
 entry:
   %a.trunc = trunc <16 x i8> %a to <16 x i1>

diff --git a/crosstest/test_select.ll b/crosstest/test_select.ll
index e286b2d..89ea076 100644
--- a/crosstest/test_select.ll
+++ b/crosstest/test_select.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 define <4 x float> @_Z6selectDv4_iDv4_fS0_(<4 x i32> %cond.ext, <4 x float> %arg1, <4 x float> %arg2) {
 entry:
   %cond = trunc <4 x i32> %cond.ext to <4 x i1>

diff --git a/crosstest/test_vector_ops.ll b/crosstest/test_vector_ops.ll
index 07011b2..a7358ba 100644
--- a/crosstest/test_vector_ops.ll
+++ b/crosstest/test_vector_ops.ll

@@ -1,5 +1,3 @@
-target triple = "i686-pc-linux-gnu"
-
 define <4 x float> @insertelement_v4f32(<4 x float> %vec, float %elt, i32 %idx) {
   switch i32 %idx, label %abort [
   i32 0, label %idx0

diff --git a/pydir/build-runtime.py b/pydir/build-runtime.py
index 63ebc80..31eb8d2 100755
--- a/pydir/build-runtime.py
+++ b/pydir/build-runtime.py

@@ -74,12 +74,12 @@
                     '{srcdir}/szrt_ll.ll'.format(srcdir=srcdir)]
         # Translate tempdir/szrt.ll and srcdir/szrt_ll.ll to szrt_native_x8632.o
         Translate(ll_files,
-                  ['-mtriple=i386-unknown-linux-gnu', '-mcpu=pentium4m'],
+                  ['-mtriple=i686', '-mcpu=pentium4m'],
                   '{rtdir}/szrt_native_x8632.o'.format(rtdir=rtdir),
                   args.verbose)
         # Translate tempdir/szrt.ll and srcdir/szrt_ll.ll to szrt_sb_x8632.o
         Translate(ll_files,
-                  ['-mtriple=i686-none-nacl-gnu', '-mcpu=pentium4m'],
+                  ['-mtriple=i686-nacl', '-mcpu=pentium4m'],
                   '{rtdir}/szrt_sb_x8632.o'.format(rtdir=rtdir),
                   args.verbose)
     finally:

diff --git a/pydir/crosstest.py b/pydir/crosstest.py
index ae3b50e..55a79df 100755
--- a/pydir/crosstest.py
+++ b/pydir/crosstest.py

@@ -2,7 +2,6 @@
 
 import argparse
 import os
-import re
 import subprocess
 import sys
 import tempfile
@@ -10,7 +9,7 @@
 from utils import shellcmd
 from utils import FindBaseNaCl
 
-if __name__ == '__main__':
+def main():
     """Builds a cross-test binary that allows functions translated by
     Subzero and llc to be compared.
 
@@ -49,6 +48,9 @@
                            dest='attr', choices=['sse2', 'sse4.1'],
                            metavar='ATTRIBUTE',
                            help='Target attribute. Default %(default)s.')
+    argparser.add_argument('--sandbox', required=False, default=0, type=int,
+                           dest='sandbox',
+                           help='Use sandboxing. Default "%(default)s".')
     argparser.add_argument('--prefix', required=True,
                            metavar='SZ_PREFIX',
                            help='String prepended to Subzero symbol names')
@@ -71,14 +73,11 @@
     args = argparser.parse_args()
 
     nacl_root = FindBaseNaCl()
-    # Prepend PNaCl bin to $PATH.
-    os.environ['PATH'] = nacl_root + \
-        '/toolchain/linux_x86/pnacl_newlib/bin' + \
-        os.pathsep + os.environ['PATH']
+    bindir = ('{root}/toolchain/linux_x86/pnacl_newlib/bin'
+              .format(root=nacl_root))
+    triple = arch_map[args.target] + ('-nacl' if args.sandbox else '')
 
     objs = []
-    remove_internal = re.compile('^define internal ')
-    fix_target = re.compile('le32-unknown-nacl')
     for arg in args.test:
         base, ext = os.path.splitext(arg)
         if ext == '.ll':
@@ -87,68 +86,61 @@
             bitcode = os.path.join(args.dir, base + '.pnacl.ll')
             shellcmd(['../pydir/build-pnacl-ir.py', '--disable-verify',
                       '--dir', args.dir, arg])
-            # Read in the bitcode file, fix it up, and rewrite the file.
-            f = open(bitcode)
-            ll_lines = f.readlines()
-            f.close()
-            f = open(bitcode, 'w')
-            for line in ll_lines:
-                line = remove_internal.sub('define ', line)
-                line = fix_target.sub('i686-pc-linux-gnu', line)
-                f.write(line)
-            f.close()
 
-        base_sz = '%s.O%s.%s.%s' % (base, args.optlevel, args.attr, args.target)
+        base_sz = '{base}.{sb}.O{opt}.{attr}.{target}'.format(
+            base=base, sb='sb' if args.sandbox else 'nat', opt=args.optlevel,
+            attr=args.attr, target=args.target)
         asm_sz = os.path.join(args.dir, base_sz + '.sz.s')
         obj_sz = os.path.join(args.dir, base_sz + '.sz.o')
-        obj_llc = os.path.join(args.dir, base + '.llc.o')
+        obj_llc = os.path.join(args.dir, base_sz + '.llc.o')
         shellcmd(['../pnacl-sz',
                   '-O' + args.optlevel,
                   '-mattr=' + args.attr,
                   '--target=' + args.target,
+                  '--sandbox=' + str(args.sandbox),
                   '--prefix=' + args.prefix,
                   '-allow-uninitialized-globals',
+                  '-externalize',
                   '-filetype=' + args.filetype,
                   '-o=' + (obj_sz if args.filetype == 'obj' else asm_sz),
                   bitcode])
         if args.filetype != 'obj':
-            shellcmd(['llvm-mc',
-                      '-triple=' + arch_map[args.target],
+            shellcmd(['{bin}/llvm-mc'.format(bin=bindir),
+                      '-triple=' + triple,
                       '-filetype=obj',
                       '-o=' + obj_sz,
                       asm_sz])
         objs.append(obj_sz)
-        # Each original bitcode file needs to be translated by the
-        # LLVM toolchain and have its object file linked in.  There
-        # are two ways to do this: explicitly use llc, or include the
-        # .ll file in the link command.  It turns out that these two
-        # approaches can produce different semantics on some undefined
-        # bitcode behavior.  Specifically, LLVM produces different
-        # results for overflowing fptoui instructions for i32 and i64
-        # on x86-32.  As it turns out, Subzero lowering was based on
-        # inspecting the object code produced by the direct llc
-        # command, so we need to directly run llc on the bitcode, even
-        # though it makes this script longer, to avoid spurious
-        # failures.  This behavior can be inspected by switching
-        # use_llc between True and False.
-        use_llc = False
-        pure_c = os.path.splitext(args.driver)[1] == '.c'
-        if not args.crosstest_bitcode:
-            objs.append(arg)
-        elif use_llc:
-            shellcmd(['llc'
+        if args.crosstest_bitcode:
+            shellcmd(['{bin}/pnacl-llc'.format(bin=bindir),
+                      '-mtriple=' + triple,
+                      # Use sse2 instructions regardless of input -mattr
+                      # argument to avoid differences in (undefined) behavior of
+                      # converting NaN to int.
+                      '-mattr=sse2',
+                      '-externalize',
                       '-filetype=obj',
                       '-o=' + obj_llc,
                       bitcode])
             objs.append(obj_llc)
         else:
-            objs.append(bitcode)
+            objs.append(arg)
 
+    # Add szrt_sb_x8632.o or szrt_native_x8632.o.
     objs.append((
             '{root}/toolchain_build/src/subzero/build/runtime/' +
-            'szrt_native_x8632.o'
-            ).format(root=nacl_root))
-    linker = 'clang' if pure_c else 'clang++'
-    shellcmd([linker, '-g', '-m32', args.driver] +
-             objs +
-             ['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
+            'szrt_{sb}_' + args.target + '.o'
+            ).format(root=nacl_root, sb='sb' if args.sandbox else 'native'))
+    pure_c = os.path.splitext(args.driver)[1] == '.c'
+    # Set compiler to clang, clang++, pnacl-clang, or pnacl-clang++.
+    compiler = '{bin}/{prefix}{cc}'.format(
+        bin=bindir, prefix='pnacl-' if args.sandbox else '',
+        cc='clang' if pure_c else 'clang++')
+    sb_native_args = (['-O0', '--pnacl-allow-native', '-arch', 'x8632']
+                      if args.sandbox else
+                      ['-g', '-m32', '-lm', '-lpthread'])
+    shellcmd([compiler, args.driver] + objs +
+             ['-o', os.path.join(args.dir, args.output)] + sb_native_args)
+
+if __name__ == '__main__':
+    main()

diff --git a/pydir/run-pnacl-sz.py b/pydir/run-pnacl-sz.py
index 65eb22b..97f4361 100755
--- a/pydir/run-pnacl-sz.py
+++ b/pydir/run-pnacl-sz.py

@@ -112,7 +112,10 @@
       asm_temp.close()
     if args.assemble and args.filetype != 'obj':
       cmd += ['|', os.path.join(llvm_bin_path, 'llvm-mc'),
-              '-triple=i686-none-nacl',
+              # TODO(stichnot): -triple=i686-nacl should be used for a
+              # sandboxing test.  This means there should be an args.sandbox
+              # argument that also gets passed through to pnacl-sz.
+              '-triple=i686',
               '-filetype=obj', '-o', asm_temp.name]
     elif asm_temp:
       cmd += ['-o', asm_temp.name]

diff --git a/runtime/szrt.c b/runtime/szrt.c
index 8c84a06..009ebe3 100644
--- a/runtime/szrt.c
+++ b/runtime/szrt.c

@@ -16,27 +16,9 @@
 
 #include <stdint.h>
 
-// TODO(stichnot): The various NaN cross tests try to map Subzero's
-// undefined behavior to the same as llc's undefined behavior, as
-// observed by the cross tests.  This will have to be kept up to date
-// with any future changes to llc, and may also have to be different
-// for different targets.  It would be better to find a more
-// appropriate set of llc options when building the Subzero runtime.
-//
-// We test for NaN using "Value==Value" instead of using isnan(Value)
-// to avoid an external dependency on fpclassify().
+uint32_t __Sz_fptoui_f32_i32(float value) { return (uint32_t)value; }
 
-uint32_t __Sz_fptoui_f32_i32(float Value) {
-  if (Value == Value) // NaNaN
-    return (uint32_t)Value;
-  return 0x80000000;
-}
-
-uint32_t __Sz_fptoui_f64_i32(double Value) {
-  if (Value == Value) // NaNaN
-    return (uint32_t)Value;
-  return 0x80000000;
-}
+uint32_t __Sz_fptoui_f64_i32(double value) { return (uint32_t)value; }
 
 uint64_t __Sz_fptoui_f32_i64(float Value) { return (uint64_t)Value; }
 

diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index 111a52b..a078a67 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp

@@ -555,19 +555,15 @@
 
     if (const auto Target = dyn_cast<Ice::ConstantRelocatable>(CallTarget)) {
       // Check if this direct call is to an Intrinsic (starts with "llvm.")
-      static const char LLVMPrefix[] = "llvm.";
-      const size_t LLVMPrefixLen = strlen(LLVMPrefix);
-      Ice::IceString Name = Target->getName();
-      if (Name.substr(0, LLVMPrefixLen) == LLVMPrefix) {
-        Ice::IceString NameSuffix = Name.substr(LLVMPrefixLen);
-        Info = Ctx->getIntrinsicsInfo().find(NameSuffix);
-        if (!Info) {
-          report_fatal_error(std::string("Invalid PNaCl intrinsic call: ") +
-                             LLVMObjectAsString(Inst));
-        }
+      bool BadIntrinsic;
+      Info = Ctx->getIntrinsicsInfo().find(Target->getName(), BadIntrinsic);
+      if (BadIntrinsic) {
+        report_fatal_error(std::string("Invalid PNaCl intrinsic call: ") +
+                           LLVMObjectAsString(Inst));
+      }
+      if (Info)
         NewInst = Ice::InstIntrinsicCall::create(Func.get(), NumArgs, Dest,
                                                  CallTarget, Info->Info);
-      }
     }
 
     // Not an intrinsic call.

diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index ff68ba2..34e6017 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp

@@ -244,7 +244,7 @@
   Section->appendData(Str, Asm->getBufferView());
   uint8_t SymbolType;
   uint8_t SymbolBinding;
-  if (IsInternal) {
+  if (IsInternal && !Ctx.getFlags().getDisableInternal()) {
     SymbolType = STT_NOTYPE;
     SymbolBinding = STB_LOCAL;
   } else {
@@ -545,6 +545,12 @@
   for (const Constant *S : UndefSyms) {
     const auto Sym = llvm::cast<ConstantRelocatable>(S);
     const IceString &Name = Sym->getName();
+    bool BadIntrinsic;
+    const Intrinsics::FullIntrinsicInfo *Info =
+        Ctx.getIntrinsicsInfo().find(Name, BadIntrinsic);
+    if (Info)
+      continue;
+    assert(!BadIntrinsic);
     assert(Sym->getOffset() == 0);
     assert(Sym->getSuppressMangling());
     SymTab->noteUndefinedSym(Name, NullSection);

diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 622c9ed..53a431e 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp

@@ -1640,11 +1640,7 @@
   assert(VarReg->hasReg());
   const RegX8632::GPRRegister Reg =
       RegX8632::getEncodedGPR(VarReg->getRegNum());
-  if (Locked) {
-    Asm->LockCmpxchg(Ty, Addr, Reg);
-  } else {
-    Asm->cmpxchg(Ty, Addr, Reg);
-  }
+  Asm->cmpxchg(Ty, Addr, Reg, Locked);
 }
 
 void InstX8632Cmpxchg::dump(const Cfg *Func) const {
@@ -1676,10 +1672,7 @@
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
   const x86::Address Addr = Mem->toAsmAddress(Asm);
-  if (Locked) {
-    Asm->lock();
-  }
-  Asm->cmpxchg8b(Addr);
+  Asm->cmpxchg8b(Addr, Locked);
 }
 
 void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
@@ -2731,10 +2724,7 @@
   assert(VarReg->hasReg());
   const RegX8632::GPRRegister Reg =
       RegX8632::getEncodedGPR(VarReg->getRegNum());
-  if (Locked) {
-    Asm->lock();
-  }
-  Asm->xadd(Ty, Addr, Reg);
+  Asm->xadd(Ty, Addr, Reg, Locked);
 }
 
 void InstX8632Xadd::dump(const Cfg *Func) const {

diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index 29b8ad2..ffeed21 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp

@@ -220,11 +220,19 @@
 
 Intrinsics::~Intrinsics() {}
 
-const Intrinsics::FullIntrinsicInfo *
-Intrinsics::find(const IceString &Name) const {
-  auto it = Map.find(Name);
-  if (it == Map.end())
+const Intrinsics::FullIntrinsicInfo *Intrinsics::find(const IceString &Name,
+                                                      bool &Error) const {
+  static const char LLVMPrefix[] = "llvm.";
+  const size_t LLVMPrefixLen = strlen(LLVMPrefix);
+  Error = false;
+  if (Name.substr(0, LLVMPrefixLen) != LLVMPrefix)
     return nullptr;
+  IceString NameSuffix = Name.substr(LLVMPrefixLen);
+  auto it = Map.find(NameSuffix);
+  if (it == Map.end()) {
+    Error = true;
+    return nullptr;
+  }
   return &it->second;
 }
 

diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 0fee205..fb066b9 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h

@@ -145,11 +145,14 @@
     Type getArgType(SizeT Index) const;
   };
 
-  // Find the information about a given intrinsic, based on function name.
-  // The function name is expected to have the common "llvm." prefix
-  // stripped. If found, returns a reference to a FullIntrinsicInfo entry
-  // (valid for the lifetime of the map). Otherwise returns null.
-  const FullIntrinsicInfo *find(const IceString &Name) const;
+  // Find the information about a given intrinsic, based on function name.  If
+  // the function name does not have the common "llvm." prefix, nullptr is
+  // returned and Error is set to false.  Otherwise, tries to find a reference
+  // to a FullIntrinsicInfo entry (valid for the lifetime of the map).  If
+  // found, sets Error to false and returns the reference.  If not found, sets
+  // Error to true and returns nullptr (indicating an unknown "llvm.foo"
+  // intrinsic).
+  const FullIntrinsicInfo *find(const IceString &Name, bool &Error) const;
 
 private:
   // TODO(jvoung): May want to switch to something like LLVM's StringMap.

diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index b88b0ae..d4bcd8e 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp

@@ -1573,18 +1573,6 @@
     return VectorIndexValid;
   }
 
-  // Returns true if the Str begins with Prefix.
-  bool isStringPrefix(const Ice::IceString &Str, const Ice::IceString &Prefix) {
-    const size_t PrefixSize = Prefix.size();
-    if (Str.size() < PrefixSize)
-      return false;
-    for (size_t i = 0; i < PrefixSize; ++i) {
-      if (Str[i] != Prefix[i])
-        return false;
-    }
-    return true;
-  }
-
   // Takes the PNaCl bitcode binary operator Opcode, and the opcode
   // type Ty, and sets Op to the corresponding ICE binary
   // opcode. Returns true if able to convert, false otherwise.
@@ -2511,20 +2499,17 @@
       ReturnType = Signature.getReturnType();
 
       // Check if this direct call is to an Intrinsic (starts with "llvm.")
-      static Ice::IceString LLVMPrefix("llvm.");
+      bool BadIntrinsic;
       const Ice::IceString &Name = Fcn->getName();
-      if (isStringPrefix(Name, LLVMPrefix)) {
-        Ice::IceString Suffix = Name.substr(LLVMPrefix.size());
-        IntrinsicInfo =
-            getTranslator().getContext()->getIntrinsicsInfo().find(Suffix);
-        if (!IntrinsicInfo) {
-          std::string Buffer;
-          raw_string_ostream StrBuf(Buffer);
-          StrBuf << "Invalid PNaCl intrinsic call to " << Name;
-          Error(StrBuf.str());
-          appendErrorInstruction(ReturnType);
-          return;
-        }
+      IntrinsicInfo = getTranslator().getContext()->getIntrinsicsInfo().find(
+          Name, BadIntrinsic);
+      if (BadIntrinsic) {
+        std::string Buffer;
+        raw_string_ostream StrBuf(Buffer);
+        StrBuf << "Invalid PNaCl intrinsic call to " << Name;
+        Error(StrBuf.str());
+        appendErrorInstruction(ReturnType);
+        return;
       }
     } else {
       if (getFlags().getStubConstantCalls() &&

diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index aac473c..a57d791 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp

@@ -2312,10 +2312,13 @@
   EmitUint8(0xF0);
 }
 
-void AssemblerX86::cmpxchg(Type Ty, const Address &address, GPRRegister reg) {
+void AssemblerX86::cmpxchg(Type Ty, const Address &address, GPRRegister reg,
+                           bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   if (Ty == IceType_i16)
     EmitOperandSizeOverride();
+  if (Locked)
+    EmitUint8(0xF0);
   EmitUint8(0x0F);
   if (isByteSizedArithType(Ty))
     EmitUint8(0xB0);
@@ -2324,17 +2327,22 @@
   EmitOperand(reg, address);
 }
 
-void AssemblerX86::cmpxchg8b(const Address &address) {
+void AssemblerX86::cmpxchg8b(const Address &address, bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (Locked)
+    EmitUint8(0xF0);
   EmitUint8(0x0F);
   EmitUint8(0xC7);
   EmitOperand(1, address);
 }
 
-void AssemblerX86::xadd(Type Ty, const Address &addr, GPRRegister reg) {
+void AssemblerX86::xadd(Type Ty, const Address &addr, GPRRegister reg,
+                        bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   if (Ty == IceType_i16)
     EmitOperandSizeOverride();
+  if (Locked)
+    EmitUint8(0xF0);
   EmitUint8(0x0F);
   if (isByteSizedArithType(Ty))
     EmitUint8(0xC0);

diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 06a601e..a6a0bb4 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h

@@ -803,16 +803,11 @@
   void mfence();
 
   void lock();
-  void cmpxchg(Type Ty, const Address &address, GPRRegister reg);
-  void cmpxchg8b(const Address &address);
-  void xadd(Type Ty, const Address &address, GPRRegister reg);
+  void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
+  void cmpxchg8b(const Address &address, bool Locked);
+  void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
   void xchg(Type Ty, const Address &address, GPRRegister reg);
 
-  void LockCmpxchg(Type Ty, const Address &address, GPRRegister reg) {
-    lock();
-    cmpxchg(Ty, address, reg);
-  }
-
   void EmitSegmentOverride(uint8_t prefix);
 
   intptr_t PreferredLoopAlignment() { return 16; }

diff --git a/tests_lit/llvm2ice_tests/elf_container.ll b/tests_lit/llvm2ice_tests/elf_container.ll
index c010ec0..f2167bb 100644
--- a/tests_lit/llvm2ice_tests/elf_container.ll
+++ b/tests_lit/llvm2ice_tests/elf_container.ll

@@ -8,7 +8,7 @@
 ; RUN:       -relocations -symbols %t | FileCheck %s
 
 ; RUN: %if --need=allow_dump --command %p2i -i %s --args -O2 --verbose none \
-; RUN:   | %if --need=allow_dump --command llvm-mc -triple=i686-none-nacl \
+; RUN:   | %if --need=allow_dump --command llvm-mc -triple=i686-nacl \
 ; RUN:     -filetype=obj -o - \
 ; RUN:   | %if --need=allow_dump --command llvm-readobj -file-headers \
 ; RUN:     -sections -section-data -relocations -symbols - \

diff --git a/tests_lit/llvm2ice_tests/elf_function_sections.ll b/tests_lit/llvm2ice_tests/elf_function_sections.ll
index 36ee1b0..6a55890 100644
--- a/tests_lit/llvm2ice_tests/elf_function_sections.ll
+++ b/tests_lit/llvm2ice_tests/elf_function_sections.ll

@@ -7,7 +7,7 @@
 ; RUN: %if --need=allow_dump --command \
 ; RUN:   %p2i -i %s --args -O2 -ffunction-sections \
 ; RUN:   | %if --need=allow_dump --command \
-; RUN:   llvm-mc -triple=i686-none-nacl -filetype=obj -o - \
+; RUN:   llvm-mc -triple=i686-nacl -filetype=obj -o - \
 ; RUN:   | %if --need=allow_dump --command \
 ; RUN:   llvm-readobj -file-headers -sections -section-data \
 ; RUN:       -relocations -symbols - \

diff --git a/tests_lit/llvm2ice_tests/elf_nodata.ll b/tests_lit/llvm2ice_tests/elf_nodata.ll
index a28639a..5ec425d 100644
--- a/tests_lit/llvm2ice_tests/elf_nodata.ll
+++ b/tests_lit/llvm2ice_tests/elf_nodata.ll

@@ -7,7 +7,7 @@
 
 ; RUN: %if --need=allow_dump --command %p2i -i %s --args -O2 \
 ; RUN:   | %if --need=allow_dump --command \
-; RUN:   llvm-mc -triple=i686-none-nacl -filetype=obj -o - \
+; RUN:   llvm-mc -triple=i686-nacl -filetype=obj -o - \
 ; RUN:   | %if --need=allow_dump --command \
 ; RUN:   llvm-readobj -file-headers -sections -section-data \
 ; RUN:       -relocations -symbols - \

diff --git a/tests_lit/llvm2ice_tests/regalloc_evict_non_overlap.ll b/tests_lit/llvm2ice_tests/regalloc_evict_non_overlap.ll
index 2f5d5c2..6a1ad65 100644
--- a/tests_lit/llvm2ice_tests/regalloc_evict_non_overlap.ll
+++ b/tests_lit/llvm2ice_tests/regalloc_evict_non_overlap.ll

@@ -5,10 +5,6 @@
 ; TODO(kschimpf) Find out why lc2i is needed.
 ; RUN: %lc2i -i %s --args -O2 --verbose regalloc
 
-; ModuleID = 'bugpoint-reduced-simplified.ll'
-target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
-target triple = "i386-pc-linux-gnu"
-
 define void @foo() {
 bb:
   br i1 undef, label %bb13, label %bb14

diff --git a/tests_lit/reader_tests/extern_globals.ll b/tests_lit/reader_tests/extern_globals.ll
index cde5778..3b75451 100644
--- a/tests_lit/reader_tests/extern_globals.ll
+++ b/tests_lit/reader_tests/extern_globals.ll

@@ -12,9 +12,6 @@
 ; RUN: %lc2i -i %s --insts --args --allow-uninitialized-globals \
 ; RUN:       -prefix Subzero_ | FileCheck --check-prefix=CROSS %s
 
-target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
-target triple = "i686-pc-linux-gnu"
-
 @ArrayInitPartial = internal global [40 x i8] c"<\00\00\00F\00\00\00P\00\00\00Z\00\00\00d\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4
 
 ; CHECK: @ArrayInitPartial = internal global [40 x i8] c"<\00\00\00F\00\00\00P\00\00\00Z\00\00\00d\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4
commit	a67fc448e9779ac8b7d9751bd7cf32f94047a345	[log] [tgz]
author	Jim Stichnoth <stichnot@chromium.org>	Tue Mar 03 16:13:11 2015 -0800
committer	Jim Stichnoth <stichnot@chromium.org>	Tue Mar 03 16:13:11 2015 -0800
tree	f1af2ac5df1c353b468aa197851767075738c940
parent	437ceff23f529d87a6dac702dd4bfbaec021321f [diff]