|  | #!/usr/bin/python | 
|  | # | 
|  | # Copyright 2013 Google Inc. All Rights Reserved. | 
|  | # | 
|  | # Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | # you may not use this file except in compliance with the License. | 
|  | # You may obtain a copy of the License at | 
|  | # | 
|  | #     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | # | 
|  | # Unless required by applicable law or agreed to in writing, software | 
|  | # distributed under the License is distributed on an "AS IS" BASIS, | 
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | # See the License for the specific language governing permissions and | 
|  | # limitations under the License. | 
|  |  | 
|  | import fileinput | 
|  | import operator | 
|  | import optparse | 
|  | import os | 
|  | import pprint | 
|  | import re | 
|  | import subprocess | 
|  | import sys | 
|  | import json | 
|  |  | 
|  | def format_bytes(bytes): | 
|  | """Pretty-print a number of bytes.""" | 
|  | if bytes > 1e6: | 
|  | bytes = bytes / 1.0e6 | 
|  | return '%.1fm' % bytes | 
|  | if bytes > 1e3: | 
|  | bytes = bytes / 1.0e3 | 
|  | return '%.1fk' % bytes | 
|  | return str(bytes) | 
|  |  | 
|  |  | 
|  | def symbol_type_to_human(type): | 
|  | """Convert a symbol type as printed by nm into a human-readable name.""" | 
|  | return { | 
|  | 'b': 'bss', | 
|  | 'd': 'data', | 
|  | 'r': 'read-only data', | 
|  | 't': 'code', | 
|  | 'u': 'weak symbol', # Unique global. | 
|  | 'w': 'weak symbol', | 
|  | 'v': 'weak symbol' | 
|  | }[type] | 
|  |  | 
|  |  | 
|  | def parse_nm(input): | 
|  | """Parse nm output. | 
|  |  | 
|  | Argument: an iterable over lines of nm output. | 
|  |  | 
|  | Yields: (symbol name, symbol type, symbol size, source file path). | 
|  | Path may be None if nm couldn't figure out the source file. | 
|  | """ | 
|  |  | 
|  | # Match lines with size + symbol + optional filename. | 
|  | sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$') | 
|  |  | 
|  | # Match lines with addr but no size. | 
|  | addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$') | 
|  | # Match lines that don't have an address at all -- typically external symbols. | 
|  | noaddr_re = re.compile(r'^ + (.) (.*)$') | 
|  |  | 
|  | for line in input: | 
|  | line = line.rstrip() | 
|  | match = sym_re.match(line) | 
|  | if match: | 
|  | size, type, sym = match.groups()[0:3] | 
|  | size = int(size, 16) | 
|  | type = type.lower() | 
|  | if type in ['u', 'v']: | 
|  | type = 'w'  # just call them all weak | 
|  | if type == 'b': | 
|  | continue  # skip all BSS for now | 
|  | path = match.group(4) | 
|  | yield sym, type, size, path | 
|  | continue | 
|  | match = addr_re.match(line) | 
|  | if match: | 
|  | type, sym = match.groups()[0:2] | 
|  | # No size == we don't care. | 
|  | continue | 
|  | match = noaddr_re.match(line) | 
|  | if match: | 
|  | type, sym = match.groups() | 
|  | if type in ('U', 'w'): | 
|  | # external or weak symbol | 
|  | continue | 
|  |  | 
|  | print >>sys.stderr, 'unparsed:', repr(line) | 
|  |  | 
|  | def demangle(ident, cppfilt): | 
|  | if cppfilt and ident.startswith('_Z'): | 
|  | # Demangle names when possible. Mangled names all start with _Z. | 
|  | ident = subprocess.check_output([cppfilt, ident]).strip() | 
|  | return ident | 
|  |  | 
|  |  | 
|  | class Suffix: | 
|  | def __init__(self, suffix, replacement): | 
|  | self.pattern = '^(.*)' + suffix + '(.*)$' | 
|  | self.re = re.compile(self.pattern) | 
|  | self.replacement = replacement | 
|  |  | 
|  | class SuffixCleanup: | 
|  | """Pre-compile suffix regular expressions.""" | 
|  | def __init__(self): | 
|  | self.suffixes = [ | 
|  | Suffix('\.part\.([0-9]+)',      'part'), | 
|  | Suffix('\.constprop\.([0-9]+)', 'constprop'), | 
|  | Suffix('\.isra\.([0-9]+)',      'isra'), | 
|  | ] | 
|  | def cleanup(self, ident, cppfilt): | 
|  | """Cleanup identifiers that have suffixes preventing demangling, | 
|  | and demangle if possible.""" | 
|  | to_append = [] | 
|  | for s in self.suffixes: | 
|  | found = s.re.match(ident) | 
|  | if not found: | 
|  | continue | 
|  | to_append += [' [' + s.replacement + '.' + found.group(2) + ']'] | 
|  | ident = found.group(1) + found.group(3) | 
|  | if len(to_append) > 0: | 
|  | # Only try to demangle if there were suffixes. | 
|  | ident = demangle(ident, cppfilt) | 
|  | for s in to_append: | 
|  | ident += s | 
|  | return ident | 
|  |  | 
|  | suffix_cleanup = SuffixCleanup() | 
|  |  | 
|  | def parse_cpp_name(name, cppfilt): | 
|  | name = suffix_cleanup.cleanup(name, cppfilt) | 
|  |  | 
|  | # Turn prefixes into suffixes so namespacing works. | 
|  | prefixes = [ | 
|  | ['bool ',                         ''], | 
|  | ['construction vtable for ',      ' [construction vtable]'], | 
|  | ['global constructors keyed to ', ' [global constructors]'], | 
|  | ['guard variable for ',           ' [guard variable]'], | 
|  | ['int ',                          ''], | 
|  | ['non-virtual thunk to ',         ' [non-virtual thunk]'], | 
|  | ['typeinfo for ',                 ' [typeinfo]'], | 
|  | ['typeinfo name for ',            ' [typeinfo name]'], | 
|  | ['virtual thunk to ',             ' [virtual thunk]'], | 
|  | ['void ',                         ''], | 
|  | ['vtable for ',                   ' [vtable]'], | 
|  | ['VTT for ',                      ' [VTT]'], | 
|  | ] | 
|  | for prefix, replacement in prefixes: | 
|  | if name.startswith(prefix): | 
|  | name = name[len(prefix):] + replacement | 
|  | # Simplify parenthesis parsing. | 
|  | replacements = [ | 
|  | ['(anonymous namespace)', '[anonymous namespace]'], | 
|  | ] | 
|  | for value, replacement in replacements: | 
|  | name = name.replace(value, replacement) | 
|  |  | 
|  | def parse_one(val): | 
|  | """Returns (leftmost-part, remaining).""" | 
|  | if (val.startswith('operator') and | 
|  | not (val[8].isalnum() or val[8] == '_')): | 
|  | # Operator overload function, terminate. | 
|  | return (val, '') | 
|  | co = val.find('::') | 
|  | lt = val.find('<') | 
|  | pa = val.find('(') | 
|  | co = len(val) if co == -1 else co | 
|  | lt = len(val) if lt == -1 else lt | 
|  | pa = len(val) if pa == -1 else pa | 
|  | if co < lt and co < pa: | 
|  | # Namespace or type name. | 
|  | return (val[:co], val[co+2:]) | 
|  | if lt < pa: | 
|  | # Template. Make sure we capture nested templates too. | 
|  | open_tmpl = 1 | 
|  | gt = lt | 
|  | while val[gt] != '>' or open_tmpl != 0: | 
|  | gt = gt + 1 | 
|  | if val[gt] == '<': | 
|  | open_tmpl = open_tmpl + 1 | 
|  | if val[gt] == '>': | 
|  | open_tmpl = open_tmpl - 1 | 
|  | ret = val[gt+1:] | 
|  | if ret.startswith('::'): | 
|  | ret = ret[2:] | 
|  | if ret.startswith('('): | 
|  | # Template function, terminate. | 
|  | return (val, '') | 
|  | return (val[:gt+1], ret) | 
|  | # Terminate with any function name, identifier, or unmangled name. | 
|  | return (val, '') | 
|  |  | 
|  | parts = [] | 
|  | while len(name) > 0: | 
|  | (part, name) = parse_one(name) | 
|  | assert len(part) > 0 | 
|  | parts.append(part) | 
|  | return parts | 
|  |  | 
|  |  | 
|  | def treeify_syms(symbols, strip_prefix=None, cppfilt=None): | 
|  | dirs = {} | 
|  | for sym, type, size, path in symbols: | 
|  | if path: | 
|  | path = os.path.normpath(path) | 
|  | if strip_prefix and path.startswith(strip_prefix): | 
|  | path = path[len(strip_prefix):] | 
|  | elif path.startswith('/'): | 
|  | path = path[1:] | 
|  | path = ['[path]'] + path.split('/') | 
|  |  | 
|  | parts = parse_cpp_name(sym, cppfilt) | 
|  | if len(parts) == 1: | 
|  | if path: | 
|  | # No namespaces, group with path. | 
|  | parts = path + parts | 
|  | else: | 
|  | new_prefix = ['[ungrouped]'] | 
|  | regroups = [ | 
|  | ['.L.str',                 '[str]'], | 
|  | ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'], | 
|  | ['.L__func__.',            '[__func__]'], | 
|  | ['.Lswitch.table',         '[switch table]'], | 
|  | ] | 
|  | for prefix, group in regroups: | 
|  | if parts[0].startswith(prefix): | 
|  | parts[0] = parts[0][len(prefix):] | 
|  | parts[0] = demangle(parts[0], cppfilt) | 
|  | new_prefix += [group] | 
|  | break | 
|  | parts = new_prefix + parts | 
|  |  | 
|  | key = parts.pop() | 
|  | tree = dirs | 
|  | try: | 
|  | depth = 0 | 
|  | for part in parts: | 
|  | depth = depth + 1 | 
|  | assert part != '', path | 
|  | if part not in tree: | 
|  | tree[part] = {'$bloat_symbols':{}} | 
|  | if type not in tree[part]['$bloat_symbols']: | 
|  | tree[part]['$bloat_symbols'][type] = 0 | 
|  | tree[part]['$bloat_symbols'][type] += 1 | 
|  | tree = tree[part] | 
|  | old_size, old_symbols = tree.get(key, (0, {})) | 
|  | if type not in old_symbols: | 
|  | old_symbols[type] = 0 | 
|  | old_symbols[type] += 1 | 
|  | tree[key] = (old_size + size, old_symbols) | 
|  | except: | 
|  | print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key) | 
|  | raise | 
|  | return dirs | 
|  |  | 
|  |  | 
|  | def jsonify_tree(tree, name): | 
|  | children = [] | 
|  | total = 0 | 
|  | files = 0 | 
|  |  | 
|  | for key, val in tree.iteritems(): | 
|  | if key == '$bloat_symbols': | 
|  | continue | 
|  | if isinstance(val, dict): | 
|  | subtree = jsonify_tree(val, key) | 
|  | total += subtree['data']['$area'] | 
|  | children.append(subtree) | 
|  | else: | 
|  | (size, symbols) = val | 
|  | total += size | 
|  | assert len(symbols) == 1, symbols.values()[0] == 1 | 
|  | symbol = symbol_type_to_human(symbols.keys()[0]) | 
|  | children.append({ | 
|  | 'name': key + ' ' + format_bytes(size), | 
|  | 'data': { | 
|  | '$area': size, | 
|  | '$symbol': symbol, | 
|  | } | 
|  | }) | 
|  |  | 
|  | children.sort(key=lambda child: -child['data']['$area']) | 
|  | dominant_symbol = '' | 
|  | if '$bloat_symbols' in tree: | 
|  | dominant_symbol = symbol_type_to_human( | 
|  | max(tree['$bloat_symbols'].iteritems(), | 
|  | key=operator.itemgetter(1))[0]) | 
|  | return { | 
|  | 'name': name + ' ' + format_bytes(total), | 
|  | 'data': { | 
|  | '$area': total, | 
|  | '$dominant_symbol': dominant_symbol, | 
|  | }, | 
|  | 'children': children, | 
|  | } | 
|  |  | 
|  |  | 
|  | def dump_nm(nmfile, strip_prefix, cppfilt): | 
|  | dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt) | 
|  | print ('var kTree = ' + | 
|  | json.dumps(jsonify_tree(dirs, '[everything]'), indent=2)) | 
|  |  | 
|  |  | 
|  | def parse_objdump(input): | 
|  | """Parse objdump -h output.""" | 
|  | sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)') | 
|  | sections = [] | 
|  | debug_sections = [] | 
|  |  | 
|  | for line in input: | 
|  | line = line.strip() | 
|  | match = sec_re.match(line) | 
|  | if match: | 
|  | name, size = match.groups() | 
|  | if name.startswith('.'): | 
|  | name = name[1:] | 
|  | if name.startswith('debug_'): | 
|  | name = name[len('debug_'):] | 
|  | debug_sections.append((name, int(size, 16))) | 
|  | else: | 
|  | sections.append((name, int(size, 16))) | 
|  | continue | 
|  | return sections, debug_sections | 
|  |  | 
|  |  | 
|  | def jsonify_sections(name, sections): | 
|  | children = [] | 
|  | total = 0 | 
|  | for section, size in sections: | 
|  | children.append({ | 
|  | 'name': section + ' ' + format_bytes(size), | 
|  | 'data': { '$area': size } | 
|  | }) | 
|  | total += size | 
|  |  | 
|  | children.sort(key=lambda child: -child['data']['$area']) | 
|  |  | 
|  | return { | 
|  | 'name': name + ' ' + format_bytes(total), | 
|  | 'data': { '$area': total }, | 
|  | 'children': children | 
|  | } | 
|  |  | 
|  |  | 
|  | def dump_sections(objdump): | 
|  | sections, debug_sections = parse_objdump(objdump) | 
|  | sections = jsonify_sections('sections', sections) | 
|  | debug_sections = jsonify_sections('debug', debug_sections) | 
|  | size = sections['data']['$area'] + debug_sections['data']['$area'] | 
|  | print 'var kTree = ' + json.dumps({ | 
|  | 'name': 'top ' + format_bytes(size), | 
|  | 'data': { '$area': size }, | 
|  | 'children': [ debug_sections, sections ]}) | 
|  |  | 
|  |  | 
|  | usage="""%prog [options] MODE | 
|  |  | 
|  | Modes are: | 
|  | syms: output symbols json suitable for a treemap | 
|  | dump: print symbols sorted by size (pipe to head for best output) | 
|  | sections: output binary sections json suitable for a treemap | 
|  |  | 
|  | nm output passed to --nm-output should from running a command | 
|  | like the following (note, can take a long time -- 30 minutes): | 
|  | nm -C -S -l /path/to/binary > nm.out | 
|  |  | 
|  | objdump output passed to --objdump-output should be from a command | 
|  | like: | 
|  | objdump -h /path/to/binary > objdump.out""" | 
|  | parser = optparse.OptionParser(usage=usage) | 
|  | parser.add_option('--nm-output', action='store', dest='nmpath', | 
|  | metavar='PATH', default='nm.out', | 
|  | help='path to nm output [default=nm.out]') | 
|  | parser.add_option('--objdump-output', action='store', dest='objdumppath', | 
|  | metavar='PATH', default='objdump.out', | 
|  | help='path to objdump output [default=objdump.out]') | 
|  | parser.add_option('--strip-prefix', metavar='PATH', action='store', | 
|  | help='strip PATH prefix from paths; e.g. /path/to/src/root') | 
|  | parser.add_option('--filter', action='store', | 
|  | help='include only symbols/files matching FILTER') | 
|  | parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt', | 
|  | default='c++filt', help="Path to c++filt, used to demangle " | 
|  | "symbols that weren't handled by nm. Set to an invalid path " | 
|  | "to disable.") | 
|  | opts, args = parser.parse_args() | 
|  |  | 
|  | if len(args) != 1: | 
|  | parser.print_usage() | 
|  | sys.exit(1) | 
|  |  | 
|  | mode = args[0] | 
|  | if mode == 'syms': | 
|  | nmfile = open(opts.nmpath, 'r') | 
|  | try: | 
|  | res = subprocess.check_output([opts.cppfilt, 'main']) | 
|  | if res.strip() != 'main': | 
|  | print >>sys.stderr, ("%s failed demangling, " | 
|  | "output won't be demangled." % opt.cppfilt) | 
|  | opts.cppfilt = None | 
|  | except: | 
|  | print >>sys.stderr, ("Could not find c++filt at %s, " | 
|  | "output won't be demangled." % opt.cppfilt) | 
|  | opts.cppfilt = None | 
|  | dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt) | 
|  | elif mode == 'sections': | 
|  | objdumpfile = open(opts.objdumppath, 'r') | 
|  | dump_sections(objdumpfile) | 
|  | elif mode == 'dump': | 
|  | nmfile = open(opts.nmpath, 'r') | 
|  | syms = list(parse_nm(nmfile)) | 
|  | # a list of (sym, type, size, path); sort by size. | 
|  | syms.sort(key=lambda x: -x[2]) | 
|  | total = 0 | 
|  | for sym, type, size, path in syms: | 
|  | if type in ('b', 'w'): | 
|  | continue  # skip bss and weak symbols | 
|  | if path is None: | 
|  | path = '' | 
|  | if opts.filter and not (opts.filter in sym or opts.filter in path): | 
|  | continue | 
|  | print '%6s %s (%s) %s' % (format_bytes(size), sym, | 
|  | symbol_type_to_human(type), path) | 
|  | total += size | 
|  | print '%6s %s' % (format_bytes(total), 'total'), | 
|  | else: | 
|  | print 'unknown mode' | 
|  | parser.print_usage() |