| #!/usr/bin/env python2 |
| |
| import argparse |
| import os |
| import pipes |
| import re |
| import sys |
| |
| from utils import FindBaseNaCl, GetObjcopyCmd, get_sfi_string, shellcmd |
| |
| def NewerThanOrNotThere(old_path, new_path): |
| """Returns whether old_path is newer than new_path. |
| |
| Also returns true if either path doesn't exist. |
| """ |
| if not (os.path.exists(old_path) and os.path.exists(new_path)): |
| return True |
| return os.path.getmtime(old_path) > os.path.getmtime(new_path) |
| |
| def BuildRegex(patterns, syms): |
| """Build a regular expression string for inclusion or exclusion. |
| |
| Creates a regex string from an array of patterns and an array |
| of symbol names. Each element in the patterns array is either a |
| regex, or a range of entries in the symbol name array, e.g. '2:9'. |
| """ |
| pattern_list = [] |
| for pattern in patterns: |
| if pattern[0].isdigit() or pattern[0] == ':': |
| # Legitimate symbols or regexes shouldn't start with a |
| # digit or a ':', so interpret the pattern as a range. |
| interval = pattern.split(':') |
| if len(interval) == 1: |
| # Treat singleton 'n' as 'n:n+1'. |
| lower = int(interval[0]) |
| upper = lower + 1 |
| elif len(interval) == 2: |
| # Handle 'a:b', 'a:', and ':b' with suitable defaults. |
| lower = int(interval[0]) if len(interval[0]) else 0 |
| upper = int(interval[1]) if len(interval[1]) else len(syms) |
| else: |
| print 'Invalid range syntax: {p}'.format(p=pattern) |
| exit(1) |
| pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) |
| pattern_list.append('^' + pattern + '$') |
| return '|'.join(pattern_list) if len(pattern_list) else '^$' |
| |
| def MatchSymbol(sym, re_include, re_exclude, default_match): |
| """Match a symbol name against inclusion/exclusion rules. |
| |
| Returns True or False depending on whether the given symbol |
| matches the compiled include or exclude regexes. The default is |
| returned if neither the include nor the exclude regex matches. |
| """ |
| if re_exclude.match(sym): |
| # Always honor an explicit exclude before considering |
| # includes. |
| return False |
| if re_include.match(sym): |
| return True |
| return default_match |
| |
| def AddOptionalArgs(argparser): |
| argparser.add_argument('--force', dest='force', type=int, choices=[0, 1], |
| default=1, |
| help='Force all re-translations of the pexe.' + |
| ' Default %(default)s.') |
| argparser.add_argument('--include', '-i', default=[], dest='include', |
| action='append', |
| help='Subzero symbols to include ' + |
| '(regex or line range)') |
| argparser.add_argument('--exclude', '-e', default=[], dest='exclude', |
| action='append', |
| help='Subzero symbols to exclude ' + |
| '(regex or line range)') |
| argparser.add_argument('--output', '-o', default='a.out', dest='output', |
| action='store', |
| help='Output executable. Default %(default)s.') |
| argparser.add_argument('-O', default='2', dest='optlevel', |
| choices=['m1', '-1', '0', '1', '2'], |
| help='Optimization level ' + |
| '(m1 and -1 are equivalent).' + |
| ' Default %(default)s.') |
| argparser.add_argument('--filetype', default='iasm', dest='filetype', |
| choices=['obj', 'asm', 'iasm'], |
| help='Output file type. Default %(default)s.') |
| argparser.add_argument('--sandbox', dest='sandbox', action='store_true', |
| help='Enable sandboxing in the translator') |
| argparser.add_argument('--nonsfi', dest='nonsfi', action='store_true', |
| help='Enable Non-SFI in the translator') |
| argparser.add_argument('--enable-block-profile', |
| dest='enable_block_profile', action='store_true', |
| help='Enable basic block profiling.') |
| argparser.add_argument('--target', default='x8632', dest='target', |
| choices=['arm32', 'x8632', 'x8664'], |
| help='Generate code for specified target.') |
| argparser.add_argument('--verbose', '-v', dest='verbose', |
| action='store_true', |
| help='Display some extra debugging output') |
| argparser.add_argument('--sz', dest='sz_args', action='append', default=[], |
| help='Extra arguments for Subzero') |
| argparser.add_argument('--llc', dest='llc_args', action='append', |
| default=[], help='Extra arguments for llc') |
| argparser.add_argument('--no-sz', dest='nosz', action='store_true', |
| help='Run only post-Subzero build steps') |
| argparser.add_argument('--fsanitize-address', dest='asan', |
| action='store_true', |
| help='Instrument with AddressSanitizer') |
| |
| def LinkSandbox(objs, exe, target, verbose=True): |
| assert target in ('x8632', 'x8664', 'arm32'), \ |
| '-sandbox is not available for %s' % target |
| nacl_root = FindBaseNaCl() |
| gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' + |
| 'le32-nacl-ld.gold').format(root=nacl_root) |
| target_lib_dir = { |
| 'arm32': 'arm', |
| 'x8632': 'x86-32', |
| 'x8664': 'x86-64', |
| }[target] |
| linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + |
| '{target_dir}/lib').format(root=nacl_root, |
| target_dir=target_lib_dir) |
| shellcmd([gold, |
| '-nostdlib', |
| '--no-fix-cortex-a8', |
| '--eh-frame-hdr', |
| '-z', 'text', |
| #'-z', 'noexecstack', |
| '--build-id', |
| '--entry=__pnacl_start', |
| '-static', #'-pie', |
| '{linklib}/crtbegin.o'.format(linklib=linklib)] + |
| objs + |
| [('{root}/toolchain_build/src/subzero/build/runtime/' + |
| 'szrt_sb_{target}.o').format(root=nacl_root, target=target), |
| '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib), |
| '--start-group', |
| '{linklib}/libgcc.a'.format(linklib=linklib), |
| '{linklib}/libcrt_platform.a'.format(linklib=linklib), |
| '--end-group', |
| '{linklib}/crtend.o'.format(linklib=linklib), |
| '--undefined=_start', |
| '--defsym=__Sz_AbsoluteZero=0', |
| #'--defsym=_begin=0', |
| '-o', exe |
| ], echo=verbose) |
| |
| def LinkNonsfi(objs, exe, target, verbose=True): |
| nacl_root = FindBaseNaCl() |
| gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' + |
| 'le32-nacl-ld.gold').format(root=nacl_root) |
| target_lib_dir = { |
| 'arm32': 'arm-nonsfi', |
| 'x8632': 'x86-32-nonsfi', |
| }[target] |
| linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + |
| '{target_dir}/lib').format(root=nacl_root, |
| target_dir=target_lib_dir) |
| shellcmd([gold, |
| '-nostdlib', |
| '--no-fix-cortex-a8', |
| '--eh-frame-hdr', |
| '-z', 'text', |
| '-z', 'noexecstack', |
| '--build-id', |
| '--entry=__pnacl_start', |
| '-pie', |
| '{linklib}/crtbegin.o'.format(linklib=linklib)] + |
| objs + |
| [('{root}/toolchain_build/src/subzero/build/runtime/' + |
| 'szrt_nonsfi_{target}.o').format(root=nacl_root, target=target), |
| '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib), |
| '--start-group', |
| '{linklib}/libgcc.a'.format(linklib=linklib), |
| '{linklib}/libcrt_platform.a'.format(linklib=linklib), |
| '--end-group', |
| '{linklib}/crtend.o'.format(linklib=linklib), |
| '--undefined=_start', |
| '--defsym=__Sz_AbsoluteZero=0', |
| '--defsym=_begin=0', |
| '-o', exe |
| ], echo=verbose) |
| |
| def LinkNative(objs, exe, target, verbose=True): |
| nacl_root = FindBaseNaCl() |
| linker = { |
| 'arm32': '/usr/bin/arm-linux-gnueabihf-g++', |
| 'mips32': '/usr/bin/mipsel-linux-gnu-g++', |
| 'x8632': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang' |
| ).format(root=nacl_root), |
| 'x8664': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang' |
| ).format(root=nacl_root) |
| }[target] |
| |
| extra_linker_args = { |
| 'arm32': ['-mcpu=cortex-a9'], |
| 'x8632': ['-m32'], |
| 'x8664': ['-mx32'] |
| }[target] |
| |
| lib_dir = { |
| 'arm32': 'arm-linux', |
| 'x8632': 'x86-32-linux', |
| 'x8664': 'x86-64-linux', |
| }[target] |
| |
| shellcmd([linker] + |
| extra_linker_args + |
| objs + |
| ['-o', exe, |
| ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + |
| '{lib_dir}/lib/' + |
| '{{unsandboxed_irt,irt_random,irt_query_list}}.o').format( |
| root=nacl_root, lib_dir=lib_dir), |
| ('{root}/toolchain_build/src/subzero/build/runtime/' + |
| 'szrt_native_{target}.o').format(root=nacl_root, target=target), |
| '-lm', '-lpthread', '-lrt', |
| '-Wl,--defsym=__Sz_AbsoluteZero=0' |
| ], echo=verbose) |
| |
| def main(): |
| """Create a hybrid translation from Subzero and llc. |
| |
| Takes a finalized pexe and builds a native executable as a hybrid of Subzero |
| and llc translated bitcode. Linker tricks are used to determine whether |
| Subzero or llc generated symbols are used, on a per-symbol basis. |
| |
| By default, for every symbol, its Subzero version is used. Subzero and llc |
| symbols can be selectively enabled/disabled via regular expressions on the |
| symbol name, or by ranges of lines in this program's auto-generated symbol |
| file. |
| |
| For each symbol, the --exclude arguments are first checked (the symbol is |
| 'rejected' on a match), followed by the --include arguments (the symbol is |
| 'accepted' on a match), followed by unconditional 'rejection'. The Subzero |
| version is used for an 'accepted' symbol, and the llc version is used for a |
| 'rejected' symbol. |
| |
| Each --include and --exclude argument can be a regular expression or a range |
| of lines in the symbol file. Each regular expression is wrapped inside |
| '^$', so if you want a substring match on 'foo', use '.*foo.*' instead. |
| Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10' |
| for the first 10 lines of the file, or '1' for the second line of the file. |
| |
| If no --include or --exclude arguments are given, the executable is produced |
| entirely using Subzero, without using llc or linker tricks. |
| |
| When using the --force=0 option, this script uses file modification |
| timestamps to determine whether llc and Subzero re-translation are needed. |
| It checks timestamps of llc, pnacl-sz, and the pexe against the translated |
| object files to determine the minimal work necessary. The --force=1 option |
| (default) suppresses those checks and re-translates everything. |
| |
| This script expects various PNaCl and LLVM tools to be found within the |
| native_client tree. When changes are made to these tools, copy them this |
| way: |
| cd native_client |
| toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\ |
| --install=toolchain/linux_x86/pnacl_newlib_raw |
| """ |
| argparser = argparse.ArgumentParser( |
| description=' ' + main.__doc__, |
| formatter_class=argparse.RawTextHelpFormatter) |
| AddOptionalArgs(argparser) |
| argparser.add_argument('pexe', help='Finalized pexe to translate') |
| args = argparser.parse_args() |
| pexe = args.pexe |
| exe = args.output |
| ProcessPexe(args, pexe, exe) |
| |
| def ProcessPexe(args, pexe, exe): |
| [pexe_base, ext] = os.path.splitext(pexe) |
| if ext != '.pexe': |
| pexe_base = pexe |
| pexe_base_unescaped = pexe_base |
| pexe_base = pipes.quote(pexe_base) |
| pexe = pipes.quote(pexe) |
| |
| nacl_root = FindBaseNaCl() |
| path_addition = ( |
| '{root}/toolchain/linux_x86/pnacl_newlib_raw/bin' |
| ).format(root=nacl_root) |
| obj_llc = pexe_base + '.llc.o' |
| obj_sz = pexe_base + '.sz.o' |
| asm_sz = pexe_base + '.sz.s' |
| obj_llc_weak = pexe_base + '.weak.llc.o' |
| obj_sz_weak = pexe_base + '.weak.sz.o' |
| obj_partial = obj_sz # overridden for hybrid mode |
| sym_llc = pexe_base + '.sym.llc.txt' |
| sym_sz = pexe_base + '.sym.sz.txt' |
| sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' |
| whitelist_sz = pexe_base + '.wl.sz.txt' |
| whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' |
| pnacl_sz = ( |
| '{root}/toolchain_build/src/subzero/pnacl-sz' |
| ).format(root=nacl_root) |
| llcbin = '{base}/pnacl-llc'.format(base=path_addition) |
| gold = '{base}/le32-nacl-ld.gold'.format(base=path_addition) |
| objcopy = '{base}/{objcopy}'.format(base=path_addition, |
| objcopy=GetObjcopyCmd(args.target)) |
| opt_level = args.optlevel |
| opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } |
| hybrid = args.include or args.exclude |
| native = not args.sandbox and not args.nonsfi |
| if args.asan: |
| if args.sandbox or args.nonsfi: |
| print 'Can only use AddressSanitizer with a native build' |
| exit(1) |
| if '-fsanitize-address' not in args.sz_args: |
| args.sz_args.append('-fsanitize-address') |
| |
| if hybrid and (args.force or |
| NewerThanOrNotThere(pexe, obj_llc) or |
| NewerThanOrNotThere(llcbin, obj_llc)): |
| arch = { |
| 'arm32': 'arm' + get_sfi_string(args, 'v7', '-nonsfi', '-nonsfi'), |
| 'x8632': 'x86-32' + get_sfi_string(args, '', '-nonsfi', '-linux'), |
| 'x8664': 'x86-64' + get_sfi_string(args, '', '', '-linux') |
| }[args.target] |
| |
| # Only run pnacl-translate in hybrid mode. |
| shellcmd(['{base}/pnacl-translate'.format(base=path_addition), |
| '-split-module=1', |
| '-ffunction-sections', |
| '-fdata-sections', |
| '-c', |
| '-arch', arch, |
| '-O' + opt_level_map[opt_level], |
| '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize', |
| '-o', obj_llc] + |
| (['--pnacl-driver-verbose'] if args.verbose else []) + |
| args.llc_args + |
| [pexe], |
| echo=args.verbose) |
| if native: |
| shellcmd(( |
| '{objcopy} --redefine-sym _start=_user_start {obj}' |
| ).format(objcopy=objcopy, obj=obj_llc), echo=args.verbose) |
| # Generate llc syms file for consistency, even though it's not used. |
| shellcmd(( |
| 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) |
| |
| if (args.force or |
| NewerThanOrNotThere(pexe, obj_sz) or |
| NewerThanOrNotThere(pnacl_sz, obj_sz)): |
| if not args.nosz: |
| # Run pnacl-sz regardless of hybrid mode. |
| shellcmd([pnacl_sz, |
| '-O' + opt_level, |
| '-bitcode-format=pnacl', |
| '-filetype=' + args.filetype, |
| '-o', obj_sz if args.filetype == 'obj' else asm_sz, |
| '-target=' + args.target] + |
| (['-externalize', |
| '-ffunction-sections', |
| '-fdata-sections'] if hybrid else []) + |
| (['-sandbox'] if args.sandbox else []) + |
| (['-nonsfi'] if args.nonsfi else []) + |
| (['-enable-block-profile'] if |
| args.enable_block_profile and not args.sandbox |
| else []) + |
| args.sz_args + |
| [pexe], |
| echo=args.verbose) |
| if args.filetype != 'obj': |
| triple = { |
| 'arm32': 'arm' + get_sfi_string(args, '-nacl', '', ''), |
| 'x8632': 'i686' + get_sfi_string(args, '-nacl', '', ''), |
| 'x8664': 'x86_64' + |
| get_sfi_string(args, '-nacl', '-linux-gnux32', |
| '-linux-gnux32'), |
| }[args.target] |
| |
| shellcmd(( |
| '{base}/llvm-mc -triple={triple} -filetype=obj -o {obj} {asm}' |
| ).format(base=path_addition, asm=asm_sz, obj=obj_sz, |
| triple=triple), |
| echo=args.verbose) |
| if native: |
| shellcmd(( |
| '{objcopy} --redefine-sym _start=_user_start {obj}' |
| ).format(objcopy=objcopy, obj=obj_sz), echo=args.verbose) |
| if hybrid: |
| shellcmd(( |
| 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) |
| |
| if hybrid: |
| with open(sym_sz_unescaped) as f: |
| sz_syms = f.read().splitlines() |
| re_include_str = BuildRegex(args.include, sz_syms) |
| re_exclude_str = BuildRegex(args.exclude, sz_syms) |
| re_include = re.compile(re_include_str) |
| re_exclude = re.compile(re_exclude_str) |
| # If a symbol doesn't explicitly match re_include or re_exclude, |
| # the default MatchSymbol() result is True, unless some --include |
| # args are provided. |
| default_match = not args.include |
| |
| whitelist_has_items = False |
| with open(whitelist_sz_unescaped, 'w') as f: |
| for sym in sz_syms: |
| if MatchSymbol(sym, re_include, re_exclude, default_match): |
| f.write(sym + '\n') |
| whitelist_has_items = True |
| shellcmd(( |
| '{objcopy} --weaken {obj} {weak}' |
| ).format(objcopy=objcopy, obj=obj_sz, weak=obj_sz_weak), |
| echo=args.verbose) |
| if whitelist_has_items: |
| # objcopy returns an error if the --weaken-symbols file is empty. |
| shellcmd(( |
| '{objcopy} --weaken-symbols={whitelist} {obj} {weak}' |
| ).format(objcopy=objcopy, |
| whitelist=whitelist_sz, obj=obj_llc, |
| weak=obj_llc_weak), |
| echo=args.verbose) |
| else: |
| shellcmd(( |
| '{objcopy} {obj} {weak}' |
| ).format(objcopy=objcopy, obj=obj_llc, weak=obj_llc_weak), |
| echo=args.verbose) |
| obj_partial = pexe_base + '.o' |
| ld = { |
| 'arm32': 'arm-linux-gnueabihf-ld', |
| 'x8632': 'ld', |
| 'x8664': 'ld', |
| }[args.target] |
| emulation = { |
| 'arm32': 'armelf_linux_eabi', |
| 'x8632': 'elf_i386', |
| 'x8664': 'elf32_x86_64' if not args.sandbox else 'elf_x86_64', |
| }[args.target] |
| shellcmd(( |
| '{ld} -r -m {emulation} -o {partial} {sz} {llc}' |
| ).format(ld=ld, emulation=emulation, partial=obj_partial, |
| sz=obj_sz_weak, llc=obj_llc_weak), |
| echo=args.verbose) |
| shellcmd(( |
| '{objcopy} -w --localize-symbol="*" {partial}' |
| ).format(objcopy=objcopy, partial=obj_partial), |
| echo=args.verbose) |
| shellcmd(( |
| '{objcopy} --globalize-symbol={start} ' + |
| '--globalize-symbol=__Sz_block_profile_info {partial}' |
| ).format(objcopy=objcopy, partial=obj_partial, |
| start=get_sfi_string(args, '_start', '_start', |
| '_user_start')), |
| echo=args.verbose) |
| |
| # Run the linker regardless of hybrid mode. |
| if args.sandbox: |
| LinkSandbox([obj_partial], exe, args.target, args.verbose) |
| elif args.nonsfi: |
| LinkNonsfi([obj_partial], exe, args.target, args.verbose) |
| else: |
| objs = [obj_partial] |
| if args.asan: |
| objs.append( |
| ('{root}/toolchain_build/src/subzero/build/runtime/' + |
| 'szrt_asan_{target}.o').format(root=nacl_root, |
| target=args.target)) |
| LinkNative(objs, exe, args.target, args.verbose) |
| |
| # Put the extra verbose printing at the end. |
| if args.verbose and hybrid: |
| print 'include={regex}'.format(regex=re_include_str) |
| print 'exclude={regex}'.format(regex=re_exclude_str) |
| print 'default_match={dm}'.format(dm=default_match) |
| print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) |
| |
| if __name__ == '__main__': |
| main() |