Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python2 |
| 2 | |
| 3 | import argparse |
| 4 | import os |
| 5 | import pipes |
| 6 | import re |
| 7 | import sys |
| 8 | |
| 9 | from utils import shellcmd |
| 10 | from utils import FindBaseNaCl |
| 11 | |
Jim Stichnoth | 0fb613f | 2014-09-16 10:19:03 -0700 | [diff] [blame] | 12 | def NewerThanOrNotThere(old_path, new_path): |
| 13 | """Returns whether old_path is newer than new_path. |
| 14 | |
| 15 | Also returns true if either path doesn't exist. |
| 16 | """ |
| 17 | if not (os.path.exists(old_path) and os.path.exists(new_path)): |
| 18 | return True |
| 19 | return os.path.getmtime(old_path) > os.path.getmtime(new_path) |
| 20 | |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 21 | def BuildRegex(patterns, syms): |
| 22 | """Build a regular expression string for inclusion or exclusion. |
| 23 | |
| 24 | Creates a regex string from an array of patterns and an array |
| 25 | of symbol names. Each element in the patterns array is either a |
| 26 | regex, or a range of entries in the symbol name array, e.g. '2:9'. |
| 27 | """ |
| 28 | pattern_list = [] |
| 29 | for pattern in patterns: |
| 30 | if pattern[0].isdigit() or pattern[0] == ':': |
| 31 | # Legitimate symbols or regexes shouldn't start with a |
| 32 | # digit or a ':', so interpret the pattern as a range. |
| 33 | interval = pattern.split(':') |
| 34 | if len(interval) == 1: |
| 35 | # Treat singleton 'n' as 'n:n+1'. |
| 36 | lower = int(interval[0]) |
| 37 | upper = lower + 1 |
| 38 | elif len(interval) == 2: |
| 39 | # Handle 'a:b', 'a:', and ':b' with suitable defaults. |
| 40 | lower = int(interval[0]) if len(interval[0]) else 0 |
| 41 | upper = int(interval[1]) if len(interval[1]) else len(syms) |
| 42 | else: |
| 43 | print 'Invalid range syntax: {p}'.format(p=pattern) |
| 44 | exit(1) |
| 45 | pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]]) |
| 46 | pattern_list.append('^' + pattern + '$') |
| 47 | return '|'.join(pattern_list) if len(pattern_list) else '^$' |
| 48 | |
| 49 | def MatchSymbol(sym, re_include, re_exclude, default_match): |
| 50 | """Match a symbol name against inclusion/exclusion rules. |
| 51 | |
| 52 | Returns True or False depending on whether the given symbol |
| 53 | matches the compiled include or exclude regexes. The default is |
| 54 | returned if neither the include nor the exclude regex matches. |
| 55 | """ |
| 56 | if re_exclude.match(sym): |
| 57 | # Always honor an explicit exclude before considering |
| 58 | # includes. |
| 59 | return False |
| 60 | if re_include.match(sym): |
| 61 | return True |
| 62 | return default_match |
| 63 | |
Jim Stichnoth | cc0ee13 | 2014-09-17 09:42:53 -0700 | [diff] [blame] | 64 | def AddOptionalArgs(argparser): |
| 65 | argparser.add_argument('--force', dest='force', action='store_true', |
| 66 | help='Force all re-translations of the pexe') |
| 67 | argparser.add_argument('--include', '-i', default=[], dest='include', |
| 68 | action='append', |
| 69 | help='Subzero symbols to include ' + |
| 70 | '(regex or line range)') |
| 71 | argparser.add_argument('--exclude', '-e', default=[], dest='exclude', |
| 72 | action='append', |
| 73 | help='Subzero symbols to exclude ' + |
| 74 | '(regex or line range)') |
| 75 | argparser.add_argument('--output', '-o', default='a.out', dest='output', |
| 76 | action='store', |
| 77 | help='Output executable. Default %(default)s.') |
| 78 | argparser.add_argument('-O', default='2', dest='optlevel', |
| 79 | choices=['m1', '-1', '0', '1', '2'], |
| 80 | help='Optimization level ' + |
| 81 | '(m1 and -1 are equivalent).' + |
| 82 | ' Default %(default)s.') |
| 83 | argparser.add_argument('--verbose', '-v', dest='verbose', |
| 84 | action='store_true', |
| 85 | help='Display some extra debugging output') |
Jim Stichnoth | 89906a5 | 2014-09-18 09:43:30 -0700 | [diff] [blame] | 86 | argparser.add_argument('--sz', dest='sz_args', action='append', default=[], |
| 87 | help='Extra arguments for Subzero') |
| 88 | argparser.add_argument('--llc', dest='llc_args', action='append', |
| 89 | default=[], help='Extra arguments for llc') |
Jim Stichnoth | cc0ee13 | 2014-09-17 09:42:53 -0700 | [diff] [blame] | 90 | |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 91 | def main(): |
| 92 | """Create a hybrid translation from Subzero and llc. |
| 93 | |
| 94 | Takes a finalized pexe and builds a native executable as a |
| 95 | hybrid of Subzero and llc translated bitcode. Linker tricks are |
| 96 | used to determine whether Subzero or llc generated symbols are |
| 97 | used, on a per-symbol basis. |
| 98 | |
| 99 | By default, for every symbol, its llc version is used. Subzero |
| 100 | symbols can be enabled by regular expressions on the symbol name, |
| 101 | or by ranges of lines in this program's auto-generated symbol |
| 102 | file. |
| 103 | |
| 104 | For each symbol, the --exclude arguments are first checked (the |
| 105 | symbol is 'rejected' on a match), followed by the --include |
| 106 | arguments (the symbol is 'accepted' on a match), followed by |
| 107 | unconditional 'rejection'. The Subzero version is used for an |
| 108 | 'accepted' symbol, and the llc version is used for a 'rejected' |
| 109 | symbol. |
| 110 | |
| 111 | Each --include and --exclude argument can be a regular expression |
| 112 | or a range of lines in the symbol file. Each regular expression |
| 113 | is wrapped inside '^$', so if you want a substring match on 'foo', |
| 114 | use '.*foo.*' instead. Ranges use python-style 'first:last' |
| 115 | notation, so e.g. use '0:10' or ':10' for the first 10 lines of |
| 116 | the file, or '1' for the second line of the file. |
| 117 | |
Jim Stichnoth | 0fb613f | 2014-09-16 10:19:03 -0700 | [diff] [blame] | 118 | This script uses file modification timestamps to determine whether |
| 119 | llc and Subzero re-translation are needed. It checks timestamps |
| 120 | of llc, llvm2ice, and the pexe against the translated object files |
| 121 | to determine the minimal work necessary. The --force option |
| 122 | suppresses those checks and re-translates everything. |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 123 | |
Jim Stichnoth | 0fb613f | 2014-09-16 10:19:03 -0700 | [diff] [blame] | 124 | This script augments PATH so that various PNaCl and LLVM tools can |
| 125 | be run. These extra paths are within the native_client tree. |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 126 | When changes are made to these tools, copy them this way: |
| 127 | cd native_client |
Jan Voung | 4c127ba | 2014-09-19 13:11:36 -0700 | [diff] [blame] | 128 | toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\ |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 129 | --install=toolchain/linux_x86/pnacl_newlib |
| 130 | """ |
| 131 | argparser = argparse.ArgumentParser( |
| 132 | description=' ' + main.__doc__, |
| 133 | formatter_class=argparse.RawTextHelpFormatter) |
Jim Stichnoth | cc0ee13 | 2014-09-17 09:42:53 -0700 | [diff] [blame] | 134 | AddOptionalArgs(argparser) |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 135 | argparser.add_argument('pexe', help='Finalized pexe to translate') |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 136 | args = argparser.parse_args() |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 137 | pexe = args.pexe |
Jim Stichnoth | cc0ee13 | 2014-09-17 09:42:53 -0700 | [diff] [blame] | 138 | exe = args.output |
| 139 | ProcessPexe(args, pexe, exe) |
| 140 | |
| 141 | def ProcessPexe(args, pexe, exe): |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 142 | [pexe_base, ext] = os.path.splitext(pexe) |
| 143 | if ext != '.pexe': |
| 144 | pexe_base = pexe |
| 145 | pexe_base_unescaped = pexe_base |
| 146 | pexe_base = pipes.quote(pexe_base) |
| 147 | pexe = pipes.quote(pexe) |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 148 | |
| 149 | nacl_root = FindBaseNaCl() |
| 150 | os.environ['PATH'] = ( |
| 151 | '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' + |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 152 | '{path}' |
| 153 | ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH']) |
| 154 | obj_llc = pexe_base + '.llc.o' |
| 155 | obj_sz = pexe_base + '.sz.o' |
| 156 | asm_sz = pexe_base + '.sz.s' |
| 157 | obj_llc_weak = pexe_base + '.weak.llc.o' |
| 158 | obj_sz_weak = pexe_base + '.weak.sz.o' |
| 159 | obj_partial = pexe_base + '.o' |
| 160 | sym_llc = pexe_base + '.sym.llc.txt' |
| 161 | sym_sz = pexe_base + '.sym.sz.txt' |
| 162 | sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt' |
| 163 | whitelist_sz = pexe_base + '.wl.sz.txt' |
| 164 | whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt' |
Jim Stichnoth | 0fb613f | 2014-09-16 10:19:03 -0700 | [diff] [blame] | 165 | llvm2ice = ( |
| 166 | '{root}/toolchain_build/src/subzero/llvm2ice' |
| 167 | ).format(root=nacl_root) |
| 168 | llcbin = ( |
Jan Voung | 4c127ba | 2014-09-19 13:11:36 -0700 | [diff] [blame] | 169 | '{root}/toolchain/linux_x86/pnacl_newlib/bin/llc' |
Jim Stichnoth | 0fb613f | 2014-09-16 10:19:03 -0700 | [diff] [blame] | 170 | ).format(root=nacl_root) |
| 171 | opt_level = args.optlevel |
Jan Voung | 109fa15 | 2014-10-07 17:22:51 -0700 | [diff] [blame] | 172 | opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' } |
Jim Stichnoth | 0fb613f | 2014-09-16 10:19:03 -0700 | [diff] [blame] | 173 | if args.force or NewerThanOrNotThere(pexe, obj_llc) or \ |
| 174 | NewerThanOrNotThere(llcbin, obj_llc): |
Jim Stichnoth | 89906a5 | 2014-09-18 09:43:30 -0700 | [diff] [blame] | 175 | shellcmd(['pnacl-translate', |
| 176 | '-ffunction-sections', |
Jim Stichnoth | 2a063e2 | 2014-10-08 11:24:51 -0700 | [diff] [blame] | 177 | '-fdata-sections', |
Jim Stichnoth | 89906a5 | 2014-09-18 09:43:30 -0700 | [diff] [blame] | 178 | '-c', |
| 179 | '-arch', 'x86-32-linux', |
| 180 | '-O' + opt_level_map[opt_level], |
| 181 | '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize', |
| 182 | '-o', obj_llc] + |
| 183 | args.llc_args + |
| 184 | [pexe], |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 185 | echo=args.verbose) |
| 186 | shellcmd(( |
| 187 | 'objcopy --redefine-sym _start=_user_start {obj}' |
| 188 | ).format(obj=obj_llc), echo=args.verbose) |
| 189 | shellcmd(( |
Jim Stichnoth | 0fb613f | 2014-09-16 10:19:03 -0700 | [diff] [blame] | 190 | 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| 191 | ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose) |
| 192 | if args.force or NewerThanOrNotThere(pexe, obj_sz) or \ |
| 193 | NewerThanOrNotThere(llvm2ice, obj_sz): |
Jim Stichnoth | 89906a5 | 2014-09-18 09:43:30 -0700 | [diff] [blame] | 194 | shellcmd([llvm2ice, |
| 195 | '-O' + opt_level, |
| 196 | '-bitcode-format=pnacl', |
Jim Stichnoth | 89906a5 | 2014-09-18 09:43:30 -0700 | [diff] [blame] | 197 | '-externalize', |
| 198 | '-ffunction-sections', |
Jim Stichnoth | 2a063e2 | 2014-10-08 11:24:51 -0700 | [diff] [blame] | 199 | '-fdata-sections', |
Jim Stichnoth | 89906a5 | 2014-09-18 09:43:30 -0700 | [diff] [blame] | 200 | '-o', asm_sz] + |
| 201 | args.sz_args + |
| 202 | [pexe], |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 203 | echo=args.verbose) |
| 204 | shellcmd(( |
| 205 | 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' + |
| 206 | '{asm}' |
| 207 | ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose) |
| 208 | shellcmd(( |
| 209 | 'objcopy --redefine-sym _start=_user_start {obj}' |
| 210 | ).format(obj=obj_sz), echo=args.verbose) |
| 211 | shellcmd(( |
| 212 | 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}' |
| 213 | ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose) |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 214 | |
| 215 | with open(sym_sz_unescaped) as f: |
| 216 | sz_syms = f.read().splitlines() |
| 217 | re_include_str = BuildRegex(args.include, sz_syms) |
| 218 | re_exclude_str = BuildRegex(args.exclude, sz_syms) |
| 219 | re_include = re.compile(re_include_str) |
| 220 | re_exclude = re.compile(re_exclude_str) |
| 221 | # If a symbol doesn't explicitly match re_include or re_exclude, |
Jim Stichnoth | be22e14 | 2014-09-26 09:47:32 -0700 | [diff] [blame] | 222 | # the default MatchSymbol() result is True, unless some --include |
| 223 | # args are provided. |
| 224 | default_match = not len(args.include) |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 225 | |
| 226 | whitelist_has_items = False |
| 227 | with open(whitelist_sz_unescaped, 'w') as f: |
| 228 | for sym in sz_syms: |
| 229 | if MatchSymbol(sym, re_include, re_exclude, default_match): |
| 230 | f.write(sym + '\n') |
| 231 | whitelist_has_items = True |
| 232 | shellcmd(( |
| 233 | 'objcopy --weaken {obj} {weak}' |
| 234 | ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose) |
| 235 | if whitelist_has_items: |
| 236 | # objcopy returns an error if the --weaken-symbols file is empty. |
| 237 | shellcmd(( |
| 238 | 'objcopy --weaken-symbols={whitelist} {obj} {weak}' |
| 239 | ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak), |
| 240 | echo=args.verbose) |
| 241 | else: |
| 242 | shellcmd(( |
| 243 | 'objcopy {obj} {weak}' |
| 244 | ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose) |
| 245 | shellcmd(( |
| 246 | 'ld -r -m elf_i386 -o {partial} {sz} {llc}' |
| 247 | ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak), |
| 248 | echo=args.verbose) |
| 249 | shellcmd(( |
| 250 | 'objcopy -w --localize-symbol="*" {partial}' |
| 251 | ).format(partial=obj_partial), echo=args.verbose) |
| 252 | shellcmd(( |
| 253 | 'objcopy --globalize-symbol=_user_start {partial}' |
| 254 | ).format(partial=obj_partial), echo=args.verbose) |
Jan Voung | 109fa15 | 2014-10-07 17:22:51 -0700 | [diff] [blame] | 255 | linker = ( |
| 256 | '{root}/../third_party/llvm-build/Release+Asserts/bin/clang' |
| 257 | ).format(root=nacl_root) |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 258 | shellcmd(( |
Jan Voung | 109fa15 | 2014-10-07 17:22:51 -0700 | [diff] [blame] | 259 | '{ld} -m32 {partial} -o {exe} -O{opt_level} ' + |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 260 | # Keep the rest of this command line (except szrt.c) in sync |
| 261 | # with RunHostLD() in pnacl-translate.py. |
Jim Stichnoth | ed178a6 | 2014-09-29 10:06:29 -0700 | [diff] [blame] | 262 | '{root}/toolchain/linux_x86/pnacl_newlib/translator/x86-32-linux/lib/' + |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 263 | '{{unsandboxed_irt,irt_query_list}}.o ' + |
| 264 | '{root}/toolchain_build/src/subzero/runtime/szrt.c ' + |
Jan Voung | 109fa15 | 2014-10-07 17:22:51 -0700 | [diff] [blame] | 265 | '{root}/toolchain_build/src/subzero/runtime/szrt_i686.ll ' + |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 266 | '-lpthread -lrt' |
Jan Voung | 109fa15 | 2014-10-07 17:22:51 -0700 | [diff] [blame] | 267 | ).format(ld=linker, partial=obj_partial, exe=exe, |
| 268 | opt_level=opt_level_map[opt_level], root=nacl_root), |
Jim Stichnoth | 10ea698 | 2014-09-09 11:19:12 -0700 | [diff] [blame] | 269 | echo=args.verbose) |
| 270 | # Put the extra verbose printing at the end. |
| 271 | if args.verbose: |
| 272 | print 'PATH={path}'.format(path=os.environ['PATH']) |
| 273 | print 'include={regex}'.format(regex=re_include_str) |
| 274 | print 'exclude={regex}'.format(regex=re_exclude_str) |
| 275 | print 'default_match={dm}'.format(dm=default_match) |
| 276 | print 'Number of Subzero syms = {num}'.format(num=len(sz_syms)) |
| 277 | |
| 278 | if __name__ == '__main__': |
| 279 | main() |