blob: 13eed429902ad191710987cafa569512ed8f0dcc [file] [log] [blame]
Jim Stichnoth10ea6982014-09-09 11:19:12 -07001#!/usr/bin/env python2
2
3import argparse
4import os
5import pipes
6import re
7import sys
8
9from utils import shellcmd
10from utils import FindBaseNaCl
11
Jim Stichnoth0fb613f2014-09-16 10:19:03 -070012def NewerThanOrNotThere(old_path, new_path):
13 """Returns whether old_path is newer than new_path.
14
15 Also returns true if either path doesn't exist.
16 """
17 if not (os.path.exists(old_path) and os.path.exists(new_path)):
18 return True
19 return os.path.getmtime(old_path) > os.path.getmtime(new_path)
20
Jim Stichnoth10ea6982014-09-09 11:19:12 -070021def BuildRegex(patterns, syms):
22 """Build a regular expression string for inclusion or exclusion.
23
24 Creates a regex string from an array of patterns and an array
25 of symbol names. Each element in the patterns array is either a
26 regex, or a range of entries in the symbol name array, e.g. '2:9'.
27 """
28 pattern_list = []
29 for pattern in patterns:
30 if pattern[0].isdigit() or pattern[0] == ':':
31 # Legitimate symbols or regexes shouldn't start with a
32 # digit or a ':', so interpret the pattern as a range.
33 interval = pattern.split(':')
34 if len(interval) == 1:
35 # Treat singleton 'n' as 'n:n+1'.
36 lower = int(interval[0])
37 upper = lower + 1
38 elif len(interval) == 2:
39 # Handle 'a:b', 'a:', and ':b' with suitable defaults.
40 lower = int(interval[0]) if len(interval[0]) else 0
41 upper = int(interval[1]) if len(interval[1]) else len(syms)
42 else:
43 print 'Invalid range syntax: {p}'.format(p=pattern)
44 exit(1)
45 pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]])
46 pattern_list.append('^' + pattern + '$')
47 return '|'.join(pattern_list) if len(pattern_list) else '^$'
48
49def MatchSymbol(sym, re_include, re_exclude, default_match):
50 """Match a symbol name against inclusion/exclusion rules.
51
52 Returns True or False depending on whether the given symbol
53 matches the compiled include or exclude regexes. The default is
54 returned if neither the include nor the exclude regex matches.
55 """
56 if re_exclude.match(sym):
57 # Always honor an explicit exclude before considering
58 # includes.
59 return False
60 if re_include.match(sym):
61 return True
62 return default_match
63
Jim Stichnothcc0ee132014-09-17 09:42:53 -070064def AddOptionalArgs(argparser):
65 argparser.add_argument('--force', dest='force', action='store_true',
66 help='Force all re-translations of the pexe')
67 argparser.add_argument('--include', '-i', default=[], dest='include',
68 action='append',
69 help='Subzero symbols to include ' +
70 '(regex or line range)')
71 argparser.add_argument('--exclude', '-e', default=[], dest='exclude',
72 action='append',
73 help='Subzero symbols to exclude ' +
74 '(regex or line range)')
75 argparser.add_argument('--output', '-o', default='a.out', dest='output',
76 action='store',
77 help='Output executable. Default %(default)s.')
78 argparser.add_argument('-O', default='2', dest='optlevel',
79 choices=['m1', '-1', '0', '1', '2'],
80 help='Optimization level ' +
81 '(m1 and -1 are equivalent).' +
82 ' Default %(default)s.')
83 argparser.add_argument('--verbose', '-v', dest='verbose',
84 action='store_true',
85 help='Display some extra debugging output')
Jim Stichnoth89906a52014-09-18 09:43:30 -070086 argparser.add_argument('--sz', dest='sz_args', action='append', default=[],
87 help='Extra arguments for Subzero')
88 argparser.add_argument('--llc', dest='llc_args', action='append',
89 default=[], help='Extra arguments for llc')
Jim Stichnothcc0ee132014-09-17 09:42:53 -070090
Jim Stichnoth10ea6982014-09-09 11:19:12 -070091def main():
92 """Create a hybrid translation from Subzero and llc.
93
94 Takes a finalized pexe and builds a native executable as a
95 hybrid of Subzero and llc translated bitcode. Linker tricks are
96 used to determine whether Subzero or llc generated symbols are
97 used, on a per-symbol basis.
98
99 By default, for every symbol, its llc version is used. Subzero
100 symbols can be enabled by regular expressions on the symbol name,
101 or by ranges of lines in this program's auto-generated symbol
102 file.
103
104 For each symbol, the --exclude arguments are first checked (the
105 symbol is 'rejected' on a match), followed by the --include
106 arguments (the symbol is 'accepted' on a match), followed by
107 unconditional 'rejection'. The Subzero version is used for an
108 'accepted' symbol, and the llc version is used for a 'rejected'
109 symbol.
110
111 Each --include and --exclude argument can be a regular expression
112 or a range of lines in the symbol file. Each regular expression
113 is wrapped inside '^$', so if you want a substring match on 'foo',
114 use '.*foo.*' instead. Ranges use python-style 'first:last'
115 notation, so e.g. use '0:10' or ':10' for the first 10 lines of
116 the file, or '1' for the second line of the file.
117
Jim Stichnoth0fb613f2014-09-16 10:19:03 -0700118 This script uses file modification timestamps to determine whether
119 llc and Subzero re-translation are needed. It checks timestamps
120 of llc, llvm2ice, and the pexe against the translated object files
121 to determine the minimal work necessary. The --force option
122 suppresses those checks and re-translates everything.
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700123
Jim Stichnoth0fb613f2014-09-16 10:19:03 -0700124 This script augments PATH so that various PNaCl and LLVM tools can
125 be run. These extra paths are within the native_client tree.
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700126 When changes are made to these tools, copy them this way:
127 cd native_client
Jan Voung4c127ba2014-09-19 13:11:36 -0700128 toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700129 --install=toolchain/linux_x86/pnacl_newlib
130 """
131 argparser = argparse.ArgumentParser(
132 description=' ' + main.__doc__,
133 formatter_class=argparse.RawTextHelpFormatter)
Jim Stichnothcc0ee132014-09-17 09:42:53 -0700134 AddOptionalArgs(argparser)
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700135 argparser.add_argument('pexe', help='Finalized pexe to translate')
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700136 args = argparser.parse_args()
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700137 pexe = args.pexe
Jim Stichnothcc0ee132014-09-17 09:42:53 -0700138 exe = args.output
139 ProcessPexe(args, pexe, exe)
140
141def ProcessPexe(args, pexe, exe):
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700142 [pexe_base, ext] = os.path.splitext(pexe)
143 if ext != '.pexe':
144 pexe_base = pexe
145 pexe_base_unescaped = pexe_base
146 pexe_base = pipes.quote(pexe_base)
147 pexe = pipes.quote(pexe)
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700148
149 nacl_root = FindBaseNaCl()
150 os.environ['PATH'] = (
151 '{root}/toolchain/linux_x86/pnacl_newlib/bin{sep}' +
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700152 '{path}'
153 ).format(root=nacl_root, sep=os.pathsep, path=os.environ['PATH'])
154 obj_llc = pexe_base + '.llc.o'
155 obj_sz = pexe_base + '.sz.o'
156 asm_sz = pexe_base + '.sz.s'
157 obj_llc_weak = pexe_base + '.weak.llc.o'
158 obj_sz_weak = pexe_base + '.weak.sz.o'
159 obj_partial = pexe_base + '.o'
160 sym_llc = pexe_base + '.sym.llc.txt'
161 sym_sz = pexe_base + '.sym.sz.txt'
162 sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
163 whitelist_sz = pexe_base + '.wl.sz.txt'
164 whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt'
Jim Stichnoth0fb613f2014-09-16 10:19:03 -0700165 llvm2ice = (
166 '{root}/toolchain_build/src/subzero/llvm2ice'
167 ).format(root=nacl_root)
168 llcbin = (
Jan Voung4c127ba2014-09-19 13:11:36 -0700169 '{root}/toolchain/linux_x86/pnacl_newlib/bin/llc'
Jim Stichnoth0fb613f2014-09-16 10:19:03 -0700170 ).format(root=nacl_root)
171 opt_level = args.optlevel
Jan Voung109fa152014-10-07 17:22:51 -0700172 opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
Jim Stichnoth0fb613f2014-09-16 10:19:03 -0700173 if args.force or NewerThanOrNotThere(pexe, obj_llc) or \
174 NewerThanOrNotThere(llcbin, obj_llc):
Jim Stichnoth89906a52014-09-18 09:43:30 -0700175 shellcmd(['pnacl-translate',
176 '-ffunction-sections',
Jim Stichnoth2a063e22014-10-08 11:24:51 -0700177 '-fdata-sections',
Jim Stichnoth89906a52014-09-18 09:43:30 -0700178 '-c',
179 '-arch', 'x86-32-linux',
180 '-O' + opt_level_map[opt_level],
181 '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize',
182 '-o', obj_llc] +
183 args.llc_args +
184 [pexe],
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700185 echo=args.verbose)
186 shellcmd((
187 'objcopy --redefine-sym _start=_user_start {obj}'
188 ).format(obj=obj_llc), echo=args.verbose)
189 shellcmd((
Jim Stichnoth0fb613f2014-09-16 10:19:03 -0700190 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
191 ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose)
192 if args.force or NewerThanOrNotThere(pexe, obj_sz) or \
193 NewerThanOrNotThere(llvm2ice, obj_sz):
Jim Stichnoth89906a52014-09-18 09:43:30 -0700194 shellcmd([llvm2ice,
195 '-O' + opt_level,
196 '-bitcode-format=pnacl',
Jim Stichnoth89906a52014-09-18 09:43:30 -0700197 '-externalize',
198 '-ffunction-sections',
Jim Stichnoth2a063e22014-10-08 11:24:51 -0700199 '-fdata-sections',
Jim Stichnoth89906a52014-09-18 09:43:30 -0700200 '-o', asm_sz] +
201 args.sz_args +
202 [pexe],
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700203 echo=args.verbose)
204 shellcmd((
205 'llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj -o {obj} ' +
206 '{asm}'
207 ).format(asm=asm_sz, obj=obj_sz), echo=args.verbose)
208 shellcmd((
209 'objcopy --redefine-sym _start=_user_start {obj}'
210 ).format(obj=obj_sz), echo=args.verbose)
211 shellcmd((
212 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
213 ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700214
215 with open(sym_sz_unescaped) as f:
216 sz_syms = f.read().splitlines()
217 re_include_str = BuildRegex(args.include, sz_syms)
218 re_exclude_str = BuildRegex(args.exclude, sz_syms)
219 re_include = re.compile(re_include_str)
220 re_exclude = re.compile(re_exclude_str)
221 # If a symbol doesn't explicitly match re_include or re_exclude,
Jim Stichnothbe22e142014-09-26 09:47:32 -0700222 # the default MatchSymbol() result is True, unless some --include
223 # args are provided.
224 default_match = not len(args.include)
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700225
226 whitelist_has_items = False
227 with open(whitelist_sz_unescaped, 'w') as f:
228 for sym in sz_syms:
229 if MatchSymbol(sym, re_include, re_exclude, default_match):
230 f.write(sym + '\n')
231 whitelist_has_items = True
232 shellcmd((
233 'objcopy --weaken {obj} {weak}'
234 ).format(obj=obj_sz, weak=obj_sz_weak), echo=args.verbose)
235 if whitelist_has_items:
236 # objcopy returns an error if the --weaken-symbols file is empty.
237 shellcmd((
238 'objcopy --weaken-symbols={whitelist} {obj} {weak}'
239 ).format(whitelist=whitelist_sz, obj=obj_llc, weak=obj_llc_weak),
240 echo=args.verbose)
241 else:
242 shellcmd((
243 'objcopy {obj} {weak}'
244 ).format(obj=obj_llc, weak=obj_llc_weak), echo=args.verbose)
245 shellcmd((
246 'ld -r -m elf_i386 -o {partial} {sz} {llc}'
247 ).format(partial=obj_partial, sz=obj_sz_weak, llc=obj_llc_weak),
248 echo=args.verbose)
249 shellcmd((
250 'objcopy -w --localize-symbol="*" {partial}'
251 ).format(partial=obj_partial), echo=args.verbose)
252 shellcmd((
253 'objcopy --globalize-symbol=_user_start {partial}'
254 ).format(partial=obj_partial), echo=args.verbose)
Jan Voung109fa152014-10-07 17:22:51 -0700255 linker = (
256 '{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
257 ).format(root=nacl_root)
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700258 shellcmd((
Jan Voung109fa152014-10-07 17:22:51 -0700259 '{ld} -m32 {partial} -o {exe} -O{opt_level} ' +
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700260 # Keep the rest of this command line (except szrt.c) in sync
261 # with RunHostLD() in pnacl-translate.py.
Jim Stichnothed178a62014-09-29 10:06:29 -0700262 '{root}/toolchain/linux_x86/pnacl_newlib/translator/x86-32-linux/lib/' +
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700263 '{{unsandboxed_irt,irt_query_list}}.o ' +
264 '{root}/toolchain_build/src/subzero/runtime/szrt.c ' +
Jan Voung109fa152014-10-07 17:22:51 -0700265 '{root}/toolchain_build/src/subzero/runtime/szrt_i686.ll ' +
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700266 '-lpthread -lrt'
Jan Voung109fa152014-10-07 17:22:51 -0700267 ).format(ld=linker, partial=obj_partial, exe=exe,
268 opt_level=opt_level_map[opt_level], root=nacl_root),
Jim Stichnoth10ea6982014-09-09 11:19:12 -0700269 echo=args.verbose)
270 # Put the extra verbose printing at the end.
271 if args.verbose:
272 print 'PATH={path}'.format(path=os.environ['PATH'])
273 print 'include={regex}'.format(regex=re_include_str)
274 print 'exclude={regex}'.format(regex=re_exclude_str)
275 print 'default_match={dm}'.format(dm=default_match)
276 print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
277
278if __name__ == '__main__':
279 main()