| # Copyright 2018 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Clustering for function call-graph. |
| |
| See the Clustering class for a detailed description. |
| """ |
| |
| import collections |
| import itertools |
| import logging |
| |
| Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist')) |
| CalleeInfo = collections.namedtuple('CalleeInfo', |
| ('index', 'callee_symbol', |
| 'misses', 'caller_and_count')) |
| CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count')) |
| |
| |
| class Clustering: |
| """Cluster symbols. |
| |
| We are given a list of the first function calls, ordered by |
| time. There are multiple lists: different benchmarks run multiple |
| times, as well as list from startup and then a second list after |
| startup (5 seconds) that runs until the benchmark memory dump. |
| |
| We have evidence (see below) that this simple ordering of code from a |
| single profiling run (a load of a website) improves performance, |
| presumably by improving code locality. To reconstruct this ordering |
| using profiling information from multiple files, we cluster. Doing |
| this clustering over multiple runs on the speedometer benchmark |
| recovered speedometer performance compared with the legacy benchmark. |
| |
| For each offset list, we record the distances between each symbol and |
| its neighborhood of the following k symbols (k=19, chosen |
| arbitrarily). For example, if we have an offset list of symbols |
| 'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e, |
| 3), etc. Then we average distances of a given neighbor pair over all |
| seen symbol lists. If we see an inversion (for example, (b->a, 3), we |
| use this as a distance of -3). For each file that a given pair does |
| not appear, that is, if the pair does not appear in that file or they |
| are separated by 20 symbols, we use a large distance D (D=1000). The |
| distances are then averages over all files. If the average is |
| negative, the neighbor pair is inverted and the distance flipped. The |
| idea is that if two symbols appear near each other in all profiling |
| runs, there is high confidence that they are usually called |
| together. If they don't appear near in some runs, there is less |
| confidence that they should be colocated. Symbol distances are taken |
| only as following distances to avoid confusing double-counting |
| possibilities as well as to give a clear ordering to combining |
| clusters. |
| |
| Neighbors are sorted, and starting with the shortest distance, symbols |
| are coalesced into clusters. If the neighbor pair is (a->b), the |
| clusters containing a and b are combined in that order. If a and b are |
| already in the same cluster, nothing happens. After processing all |
| neighbors there is usually only one cluster; if there are multiple |
| clusters they are combined in order from largest to smallest (although |
| that choice may not matter). |
| |
| Cluster merging may optionally be halted if they get above the size |
| of an android page. As of November 2018 this slightly reduces |
| performance and should not be used (1.7% decline in speedometer2, |
| 450K native library memory regression). |
| """ |
| NEIGHBOR_DISTANCE = 20 |
| FAR_DISTANCE = 1000 |
| MAX_CLUSTER_SIZE = 4096 # 4k pages on android. |
| |
| class _Cluster: |
| def __init__(self, syms, size): |
| assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster' |
| self._syms = syms |
| self._size = size |
| |
| @property |
| def syms(self): |
| return self._syms |
| |
| @property |
| def binary_size(self): |
| return self._size |
| |
| @classmethod |
| def ClusteredSymbolLists(cls, sym_lists, size_map): |
| c = cls() |
| c.AddSymbolLists(sym_lists) |
| return c.ClusterToList(size_map) |
| |
| def __init__(self): |
| self._num_lists = None |
| self._neighbors = None |
| self._cluster_map = {} |
| self._symbol_size = lambda _: 0 # Maps a symbol to a size. |
| |
| def _MakeCluster(self, syms): |
| c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms)) |
| for s in syms: |
| self._cluster_map[s] = c |
| return c |
| |
| def ClusterOf(self, s): |
| if isinstance(s, self._Cluster): |
| assert self._cluster_map[s.syms[0]] == s |
| return s |
| if s in self._cluster_map: |
| return self._cluster_map[s] |
| return self._MakeCluster([s]) |
| |
| def Combine(self, a, b): |
| """Combine clusters. |
| |
| Args: |
| a, b: Clusters or str. The canonical cluster (ClusterOf) will be |
| used to do the combining. |
| |
| Returns: |
| A merged cluster from a and b, or None if a and b are in the same cluster. |
| """ |
| canonical_a = self.ClusterOf(a) |
| canonical_b = self.ClusterOf(b) |
| if canonical_a == canonical_b: |
| return None |
| return self._MakeCluster(canonical_a._syms + canonical_b._syms) |
| |
| def AddSymbolLists(self, sym_lists): |
| self._num_lists = len(sym_lists) |
| self._neighbors = self._CoalesceNeighbors( |
| self._ConstructNeighbors(sym_lists)) |
| |
| def _ConstructNeighbors(self, sym_lists): |
| neighbors = [] |
| for sym_list in sym_lists: |
| for i, s in enumerate(sym_list): |
| for j in range(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))): |
| if s == sym_list[j]: |
| # Free functions that are static inline seem to be the only |
| # source of these duplicates. |
| continue |
| neighbors.append(Neighbor(s, sym_list[j], j - i)) |
| logging.info('Constructed %s symbol neighbors', len(neighbors)) |
| return neighbors |
| |
| def _CoalesceNeighbors(self, neighbors): |
| pairs = collections.defaultdict(list) |
| for n in neighbors: |
| pairs[(n.src, n.dst)].append(n.dist) |
| coalesced = [] |
| logging.info('Will coalesce over %s neighbor pairs', len(pairs)) |
| count = 0 |
| for (s, t) in pairs: |
| assert s != t, '{} != {}'.format(s, t) |
| if (t, s) in pairs and t < s: |
| # Only process each unordered pair once. |
| continue |
| count += 1 |
| if not (count % 1e6): |
| logging.info('tick') |
| distances = [] |
| if (s, t) in pairs: |
| distances.extend(pairs[(s, t)]) |
| if (t, s) in pairs: |
| distances.extend(-d for d in pairs[(t, s)]) |
| if distances: |
| num_missing = self._num_lists - len(distances) |
| avg_distance = (float(sum(distances)) + |
| self.FAR_DISTANCE * num_missing) / self._num_lists |
| if avg_distance > 0: |
| coalesced.append(Neighbor(s, t, avg_distance)) |
| else: |
| coalesced.append(Neighbor(t, s, avg_distance)) |
| return coalesced |
| |
| def ClusterToList(self, size_map=None): |
| """Merge the clusters with the smallest distances. |
| |
| Args: |
| size_map ({symbol: size} or None): Map symbol names to their size. Cluster |
| growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to |
| be zero and cluster growth is not stopped. |
| |
| Returns: |
| An ordered list of symbols from AddSymbolLists, appropriately clustered. |
| """ |
| if size_map: |
| self._symbol_size = lambda s: size_map[s] |
| if not self._num_lists or not self._neighbors: |
| # Some sort of trivial set of symbol lists, such as all being |
| # length 1. Return an empty ordering. |
| return [] |
| logging.info('Sorting %s neighbors', len(self._neighbors)) |
| self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst)) |
| logging.info('Clustering...') |
| count = 0 |
| while self._neighbors: |
| count += 1 |
| if not (count % 1e6): |
| logging.info('tock') |
| neighbor = self._neighbors.pop() |
| src = self.ClusterOf(neighbor.src) |
| dst = self.ClusterOf(neighbor.dst) |
| if (src == dst or |
| src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE): |
| continue |
| self.Combine(src, dst) |
| if size_map: |
| clusters_by_size = sorted(list(set(self._cluster_map.values())), |
| key=lambda c: -c.binary_size) |
| else: |
| clusters_by_size = sorted(list(set(self._cluster_map.values())), |
| key=lambda c: -len(c.syms)) |
| logging.info('Produced %s clusters', len(clusters_by_size)) |
| logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size) |
| for c in clusters_by_size[:4]]) |
| logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size) |
| for c in clusters_by_size[-4:]]) |
| ordered_syms = [] |
| for c in clusters_by_size: |
| ordered_syms.extend(c.syms) |
| assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!' |
| return ordered_syms |
| |
| def _GetOffsetSymbolName(processor, dump_offset): |
| dump_offset_to_symbol_info = \ |
| processor.GetDumpOffsetToSymboInfolIncludingWhitelist() |
| offset_to_primary = processor.OffsetToPrimaryMap() |
| idx = dump_offset // 2 |
| assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), ( |
| 'Dump offset out of binary range') |
| symbol_info = dump_offset_to_symbol_info[idx] |
| assert symbol_info, ('A return address (offset = 0x{:08x}) does not map ' |
| 'to any symbol'.format(dump_offset)) |
| assert symbol_info.offset in offset_to_primary, ( |
| 'Offset not found in primary map!') |
| return offset_to_primary[symbol_info.offset].name |
| |
| def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False): |
| raw_offsets = profiles.GetProcessOffsetLists() |
| process_symbols = collections.defaultdict(list) |
| seen_symbols = set() |
| for p in raw_offsets: |
| for offsets in raw_offsets[p]: |
| symbol_names = processor.GetOrderedSymbols( |
| processor.GetReachedOffsetsFromDump(offsets)) |
| process_symbols[p].append(symbol_names) |
| seen_symbols |= set(symbol_names) |
| if limit_cluster_size: |
| name_map = processor.NameToSymbolMap() |
| size_map = {name: name_map[name].size for name in seen_symbols} |
| else: |
| size_map = None |
| |
| # Process names from the profile dumps that are treated specially. |
| _RENDERER = 'renderer' |
| _BROWSER = 'browser' |
| |
| assert _RENDERER in process_symbols |
| assert _BROWSER in process_symbols |
| |
| renderer_clustering = Clustering.ClusteredSymbolLists( |
| process_symbols[_RENDERER], size_map) |
| browser_clustering = Clustering.ClusteredSymbolLists( |
| process_symbols[_BROWSER], size_map) |
| other_lists = [] |
| for process, syms in process_symbols.items(): |
| if process not in (_RENDERER, _BROWSER): |
| other_lists.extend(syms) |
| if other_lists: |
| other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map) |
| else: |
| other_clustering = [] |
| |
| # Start with the renderer cluster to favor rendering performance. |
| final_ordering = list(renderer_clustering) |
| seen = set(final_ordering) |
| final_ordering.extend(s for s in browser_clustering if s not in seen) |
| seen |= set(browser_clustering) |
| final_ordering.extend(s for s in other_clustering if s not in seen) |
| |
| return final_ordering |
| |
| |
| def ClusterOffsets(profiles, processor, limit_cluster_size=False): |
| """Cluster profile offsets. |
| |
| Args: |
| profiles (ProfileManager) Manager of the profile dump files. |
| processor (SymbolOffsetProcessor) Symbol table processor for the dumps. |
| |
| Returns: |
| A list of clustered symbol offsets. |
| """ |
| return _ClusterOffsetsLists(profiles, processor, limit_cluster_size) |