Subzero: Allow non-hybrid binaries to be built.

Build a pure-Subzero binary when neither --include nor --exclude is specified.

A pure-Subzero binary is built without flags:
  -externalize
  -ffunction-sections
  -fdata-sections
which is good because that configuration is closer to what real usage will be, and will get more testing during development.

BUG= none
R=kschimpf@google.com

Review URL: https://codereview.chromium.org/693393003
diff --git a/pydir/szbuild.py b/pydir/szbuild.py
index b822f2d..0b61667 100755
--- a/pydir/szbuild.py
+++ b/pydir/szbuild.py
@@ -91,39 +91,39 @@
 def main():
     """Create a hybrid translation from Subzero and llc.
 
-    Takes a finalized pexe and builds a native executable as a
-    hybrid of Subzero and llc translated bitcode.  Linker tricks are
-    used to determine whether Subzero or llc generated symbols are
-    used, on a per-symbol basis.
+    Takes a finalized pexe and builds a native executable as a hybrid of Subzero
+    and llc translated bitcode.  Linker tricks are used to determine whether
+    Subzero or llc generated symbols are used, on a per-symbol basis.
 
-    By default, for every symbol, its llc version is used.  Subzero
-    symbols can be enabled by regular expressions on the symbol name,
-    or by ranges of lines in this program's auto-generated symbol
+    By default, for every symbol, its Subzero version is used.  Subzero and llc
+    symbols can be selectively enabled/disabled via regular expressions on the
+    symbol name, or by ranges of lines in this program's auto-generated symbol
     file.
 
-    For each symbol, the --exclude arguments are first checked (the
-    symbol is 'rejected' on a match), followed by the --include
-    arguments (the symbol is 'accepted' on a match), followed by
-    unconditional 'rejection'.  The Subzero version is used for an
-    'accepted' symbol, and the llc version is used for a 'rejected'
-    symbol.
+    For each symbol, the --exclude arguments are first checked (the symbol is
+    'rejected' on a match), followed by the --include arguments (the symbol is
+    'accepted' on a match), followed by unconditional 'rejection'.  The Subzero
+    version is used for an 'accepted' symbol, and the llc version is used for a
+    'rejected' symbol.
 
-    Each --include and --exclude argument can be a regular expression
-    or a range of lines in the symbol file.  Each regular expression
-    is wrapped inside '^$', so if you want a substring match on 'foo',
-    use '.*foo.*' instead.  Ranges use python-style 'first:last'
-    notation, so e.g. use '0:10' or ':10' for the first 10 lines of
-    the file, or '1' for the second line of the file.
+    Each --include and --exclude argument can be a regular expression or a range
+    of lines in the symbol file.  Each regular expression is wrapped inside
+    '^$', so if you want a substring match on 'foo', use '.*foo.*' instead.
+    Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10'
+    for the first 10 lines of the file, or '1' for the second line of the file.
 
-    This script uses file modification timestamps to determine whether
-    llc and Subzero re-translation are needed.  It checks timestamps
-    of llc, llvm2ice, and the pexe against the translated object files
-    to determine the minimal work necessary.  The --force option
-    suppresses those checks and re-translates everything.
+    If no --include or --exclude arguments are given, the executable is produced
+    entirely using Subzero, without using llc or linker tricks.
 
-    This script augments PATH so that various PNaCl and LLVM tools can
-    be run.  These extra paths are within the native_client tree.
-    When changes are made to these tools, copy them this way:
+    This script uses file modification timestamps to determine whether llc and
+    Subzero re-translation are needed.  It checks timestamps of llc, llvm2ice,
+    and the pexe against the translated object files to determine the minimal
+    work necessary.  The --force option suppresses those checks and
+    re-translates everything.
+
+    This script augments PATH so that various PNaCl and LLVM tools can be run.
+    These extra paths are within the native_client tree.  When changes are made
+    to these tools, copy them this way:
       cd native_client
       toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\
       --install=toolchain/linux_x86/pnacl_newlib
@@ -156,7 +156,7 @@
     asm_sz = pexe_base + '.sz.s'
     obj_llc_weak = pexe_base + '.weak.llc.o'
     obj_sz_weak = pexe_base + '.weak.sz.o'
-    obj_partial = pexe_base + '.o'
+    obj_partial = obj_sz  # overridden for hybrid mode
     sym_llc = pexe_base + '.sym.llc.txt'
     sym_sz = pexe_base + '.sym.sz.txt'
     sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
@@ -170,8 +170,12 @@
         ).format(root=nacl_root)
     opt_level = args.optlevel
     opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
-    if args.force or NewerThanOrNotThere(pexe, obj_llc) or \
-            NewerThanOrNotThere(llcbin, obj_llc):
+    hybrid = args.include or args.exclude
+
+    if hybrid and (args.force or
+                   NewerThanOrNotThere(pexe, obj_llc) or
+                   NewerThanOrNotThere(llcbin, obj_llc)):
+        # Only run pnacl-translate in hybrid mode.
         shellcmd(['pnacl-translate',
                   '-ffunction-sections',
                   '-fdata-sections',
@@ -186,18 +190,22 @@
         shellcmd((
             'objcopy --redefine-sym _start=_user_start {obj}'
             ).format(obj=obj_llc), echo=args.verbose)
+        # Generate llc syms file for consistency, even though it's not used.
         shellcmd((
             'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
             ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose)
-    if args.force or NewerThanOrNotThere(pexe, obj_sz) or \
-            NewerThanOrNotThere(llvm2ice, obj_sz):
+
+    if (args.force or
+        NewerThanOrNotThere(pexe, obj_sz) or
+        NewerThanOrNotThere(llvm2ice, obj_sz)):
+        # Run llvm2ice regardless of hybrid mode.
         shellcmd([llvm2ice,
                   '-O' + opt_level,
                   '-bitcode-format=pnacl',
-                  '-externalize',
-                  '-ffunction-sections',
-                  '-fdata-sections',
                   '-o', asm_sz] +
+                 (['-externalize',
+                   '-ffunction-sections',
+                   '-fdata-sections'] if hybrid else []) +
                  args.sz_args +
                  [pexe],
                  echo=args.verbose)
@@ -207,50 +215,56 @@
         shellcmd((
             'objcopy --redefine-sym _start=_user_start {obj}'
             ).format(obj=obj_sz), echo=args.verbose)
-        shellcmd((
-            'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
-            ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)
+        if hybrid:
+            shellcmd((
+                'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
+                ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)
 
-    with open(sym_sz_unescaped) as f:
-        sz_syms = f.read().splitlines()
-    re_include_str = BuildRegex(args.include, sz_syms)
-    re_exclude_str = BuildRegex(args.exclude, sz_syms)
-    re_include = re.compile(re_include_str)
-    re_exclude = re.compile(re_exclude_str)
-    # If a symbol doesn't explicitly match re_include or re_exclude,
-    # the default MatchSymbol() result is True, unless some --include
-    # args are provided.
-    default_match = not len(args.include)
+    if hybrid:
+        with open(sym_sz_unescaped) as f:
+            sz_syms = f.read().splitlines()
+        re_include_str = BuildRegex(args.include, sz_syms)
+        re_exclude_str = BuildRegex(args.exclude, sz_syms)
+        re_include = re.compile(re_include_str)
+        re_exclude = re.compile(re_exclude_str)
+        # If a symbol doesn't explicitly match re_include or re_exclude,
+        # the default MatchSymbol() result is True, unless some --include
+        # args are provided.
+        default_match = not args.include
 
-    whitelist_has_items = False
-    with open(whitelist_sz_unescaped, 'w') as f:
-        for sym in sz_syms:
-            if MatchSymbol(sym, re_include, re_exclude, default_match):
-                f.write(sym + '\n')
-                whitelist_has_items = True
-    shellcmd((
-        'objcopy --weaken {obj} {weak}'
-        ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose)
-    if whitelist_has_items:
-        # objcopy returns an error if the --weaken-symbols file is empty.
+        whitelist_has_items = False
+        with open(whitelist_sz_unescaped, 'w') as f:
+            for sym in sz_syms:
+                if MatchSymbol(sym, re_include, re_exclude, default_match):
+                    f.write(sym + '\n')
+                    whitelist_has_items = True
         shellcmd((
-            'objcopy --weaken-symbols={whitelist} {obj} {weak}'
-            ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak),
+            'objcopy --weaken {obj} {weak}'
+            ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose)
+        if whitelist_has_items:
+            # objcopy returns an error if the --weaken-symbols file is empty.
+            shellcmd((
+                'objcopy --weaken-symbols={whitelist} {obj} {weak}'
+                ).format(whitelist=whitelist_sz, obj=obj_llc,
+                         weak=obj_llc_weak),
+                     echo=args.verbose)
+        else:
+            shellcmd((
+                'objcopy {obj} {weak}'
+                ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose)
+        obj_partial = pexe_base + '.o'
+        shellcmd((
+            'ld -r -m elf_i386 -o {partial} {sz} {llc}'
+            ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak),
                  echo=args.verbose)
-    else:
         shellcmd((
-            'objcopy {obj} {weak}'
-            ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose)
-    shellcmd((
-        'ld -r -m elf_i386 -o {partial} {sz} {llc}'
-        ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak),
-             echo=args.verbose)
-    shellcmd((
-        'objcopy -w --localize-symbol="*" {partial}'
-        ).format(partial=obj_partial), echo=args.verbose)
-    shellcmd((
-        'objcopy --globalize-symbol=_user_start {partial}'
-        ).format(partial=obj_partial), echo=args.verbose)
+            'objcopy -w --localize-symbol="*" {partial}'
+            ).format(partial=obj_partial), echo=args.verbose)
+        shellcmd((
+            'objcopy --globalize-symbol=_user_start {partial}'
+            ).format(partial=obj_partial), echo=args.verbose)
+
+    # Run the linker regardless of hybrid mode.
     linker = (
         '{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
         ).format(root=nacl_root)
@@ -266,13 +280,15 @@
         ).format(ld=linker, partial=obj_partial, exe=exe,
                  opt_level=opt_level_map[opt_level], root=nacl_root),
              echo=args.verbose)
+
     # Put the extra verbose printing at the end.
     if args.verbose:
         print 'PATH={path}'.format(path=os.environ['PATH'])
-        print 'include={regex}'.format(regex=re_include_str)
-        print 'exclude={regex}'.format(regex=re_exclude_str)
-        print 'default_match={dm}'.format(dm=default_match)
-        print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
+        if hybrid:
+            print 'include={regex}'.format(regex=re_include_str)
+            print 'exclude={regex}'.format(regex=re_exclude_str)
+            print 'default_match={dm}'.format(dm=default_match)
+            print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
 
 if __name__ == '__main__':
     main()