Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python2 |
| 2 | |
| 3 | import argparse |
| 4 | import itertools |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 5 | import re |
| 6 | |
| 7 | if __name__ == '__main__': |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 8 | """Compares a LLVM file with a subzero file for differences. |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 9 | |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 10 | Before comparing, the LLVM file is massaged to remove comments, |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 11 | blank lines, global variable definitions, external function |
| 12 | declarations, and possibly other patterns that llvm2ice does not |
| 13 | handle. |
| 14 | |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 15 | The subzero file and the massaged LLVM file are compared line by |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 16 | line for differences. However, there is a regex defined such that |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 17 | if the regex matches a line in the LLVM file, that line and the |
| 18 | corresponding line in the subzero file are ignored. This lets us |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 19 | ignore minor differences such as inttoptr and ptrtoint, and |
| 20 | printing of floating-point constants. |
| 21 | |
| 22 | On success, no output is produced. On failure, each mismatch is |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 23 | printed as two lines, one starting with 'SZ' (subzero) and one |
| 24 | starting with 'LL' (LLVM). |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 25 | """ |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 26 | desc = 'Compare LLVM and subzero bitcode files.' |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 27 | argparser = argparse.ArgumentParser(description=desc) |
| 28 | argparser.add_argument( |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 29 | 'llfile', nargs=1, |
| 30 | type=argparse.FileType('r'), metavar='LLVM_FILE', |
| 31 | help='LLVM bitcode file') |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 32 | argparser.add_argument( |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 33 | 'szfile', nargs='?', default='-', |
| 34 | type=argparse.FileType('r'), metavar='SUBZERO_FILE', |
| 35 | help='Subzero bitcode file [default stdin]') |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 36 | args = argparser.parse_args() |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 37 | bitcode = args.llfile[0].readlines() |
| 38 | sz_out = [ line.rstrip() for line in args.szfile.readlines()] |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 39 | |
| 40 | # Filter certain lines and patterns from the input, and collect |
| 41 | # the remainder into llc_out. |
| 42 | llc_out = [] |
| 43 | tail_call = re.compile(' tail call '); |
| 44 | trailing_comment = re.compile(';.*') |
Jim Stichnoth | de4ca71 | 2014-06-29 08:13:48 -0700 | [diff] [blame] | 45 | ignore_pattern = re.compile('|'.join([ |
| 46 | '^ *$', # all-whitespace lines |
| 47 | '^declare', # declarations without definitions |
| 48 | '^@.*\]$' # PNaCl global declarations like: |
| 49 | # @v = external global [4 x i8] |
| 50 | ])) |
Jan Voung | 3bd9f1a | 2014-06-18 10:50:57 -0700 | [diff] [blame] | 51 | prev_line = None |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 52 | for line in bitcode: |
Jan Voung | 3bd9f1a | 2014-06-18 10:50:57 -0700 | [diff] [blame] | 53 | if prev_line: |
| 54 | line = prev_line + line |
| 55 | prev_line = None |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 56 | # Convert tail call into regular (non-tail) call. |
| 57 | line = tail_call.sub(' call ', line) |
| 58 | # Remove trailing comments and spaces. |
| 59 | line = trailing_comment.sub('', line).rstrip() |
| 60 | # Ignore blanks lines, forward declarations, and variable definitions. |
Jan Voung | 3bd9f1a | 2014-06-18 10:50:57 -0700 | [diff] [blame] | 61 | if ignore_pattern.search(line): |
| 62 | continue |
| 63 | # SZ doesn't break up long lines, but LLVM does. Normalize to SZ. |
| 64 | if line.endswith(','): |
| 65 | prev_line = line |
| 66 | continue |
| 67 | llc_out.append(line) |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 68 | |
| 69 | # Compare sz_out and llc_out line by line, but ignore pairs of |
| 70 | # lines where the llc line matches a certain pattern. |
| 71 | return_code = 0 |
| 72 | lines_total = 0 |
| 73 | lines_diff = 0 |
| 74 | ignore_pattern = re.compile( |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 75 | '|'.join([' -[0-9]', # negative constants |
| 76 | ' (float|double) [-0-9]', # FP constants |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 77 | ' (float|double) %\w+, [-0-9]', |
Jan Voung | 3bd9f1a | 2014-06-18 10:50:57 -0700 | [diff] [blame] | 78 | ' @llvm\..*i\d+\*', # intrinsic calls w/ pointer args |
| 79 | ' i\d+\* @llvm\.', # intrinsic calls w/ pointer ret |
Karl Schimpf | a667fb8 | 2014-05-19 14:56:51 -0700 | [diff] [blame] | 80 | ' inttoptr ', # inttoptr pointer types |
| 81 | ' ptrtoint ', # ptrtoint pointer types |
| 82 | ' bitcast .*\* .* to .*\*' # bitcast pointer types |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 83 | ])) |
| 84 | for (sz_line, llc_line) in itertools.izip_longest(sz_out, llc_out): |
| 85 | lines_total += 1 |
| 86 | if sz_line == llc_line: |
| 87 | continue |
| 88 | if llc_line and ignore_pattern.search(llc_line): |
| 89 | lines_diff += 1 |
| 90 | continue |
Jan Voung | 3bd9f1a | 2014-06-18 10:50:57 -0700 | [diff] [blame] | 91 | if sz_line: print 'SZ (%d)> %s' % (lines_total, sz_line) |
| 92 | if llc_line: print 'LL (%d)> %s' % (lines_total, llc_line) |
Jim Stichnoth | f7c9a14 | 2014-04-29 10:52:43 -0700 | [diff] [blame] | 93 | return_code = 1 |
| 94 | |
| 95 | if return_code == 0: |
| 96 | message = 'Success (ignored %d diffs out of %d lines)' |
| 97 | print message % (lines_diff, lines_total) |
| 98 | exit(return_code) |