Avi Drissman | dfd88085 | 2022-09-15 20:11:09 | [diff] [blame] | 1 | # Copyright 2018 The Chromium Authors |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Clustering for function call-graph. |
| 6 | |
| 7 | See the Clustering class for a detailed description. |
| 8 | """ |
| 9 | |
| 10 | import collections |
| 11 | import itertools |
| 12 | import logging |
| 13 | |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 14 | Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist')) |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 15 | CalleeInfo = collections.namedtuple('CalleeInfo', |
| 16 | ('index', 'callee_symbol', |
| 17 | 'misses', 'caller_and_count')) |
| 18 | CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count')) |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 19 | |
| 20 | |
Jesse McKenna | c0b694b7 | 2022-06-17 17:46:14 | [diff] [blame] | 21 | class Clustering: |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 22 | """Cluster symbols. |
| 23 | |
| 24 | We are given a list of the first function calls, ordered by |
| 25 | time. There are multiple lists: different benchmarks run multiple |
| 26 | times, as well as list from startup and then a second list after |
| 27 | startup (5 seconds) that runs until the benchmark memory dump. |
| 28 | |
| 29 | We have evidence (see below) that this simple ordering of code from a |
| 30 | single profiling run (a load of a website) improves performance, |
| 31 | presumably by improving code locality. To reconstruct this ordering |
| 32 | using profiling information from multiple files, we cluster. Doing |
| 33 | this clustering over multiple runs on the speedometer benchmark |
| 34 | recovered speedometer performance compared with the legacy benchmark. |
| 35 | |
| 36 | For each offset list, we record the distances between each symbol and |
| 37 | its neighborhood of the following k symbols (k=19, chosen |
| 38 | arbitrarily). For example, if we have an offset list of symbols |
| 39 | 'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e, |
| 40 | 3), etc. Then we average distances of a given neighbor pair over all |
| 41 | seen symbol lists. If we see an inversion (for example, (b->a, 3), we |
| 42 | use this as a distance of -3). For each file that a given pair does |
| 43 | not appear, that is, if the pair does not appear in that file or they |
| 44 | are separated by 20 symbols, we use a large distance D (D=1000). The |
| 45 | distances are then averages over all files. If the average is |
| 46 | negative, the neighbor pair is inverted and the distance flipped. The |
| 47 | idea is that if two symbols appear near each other in all profiling |
| 48 | runs, there is high confidence that they are usually called |
| 49 | together. If they don't appear near in some runs, there is less |
| 50 | confidence that they should be colocated. Symbol distances are taken |
| 51 | only as following distances to avoid confusing double-counting |
| 52 | possibilities as well as to give a clear ordering to combining |
| 53 | clusters. |
| 54 | |
| 55 | Neighbors are sorted, and starting with the shortest distance, symbols |
| 56 | are coalesced into clusters. If the neighbor pair is (a->b), the |
| 57 | clusters containing a and b are combined in that order. If a and b are |
| 58 | already in the same cluster, nothing happens. After processing all |
| 59 | neighbors there is usually only one cluster; if there are multiple |
| 60 | clusters they are combined in order from largest to smallest (although |
| 61 | that choice may not matter). |
| 62 | |
| 63 | Cluster merging may optionally be halted if they get above the size |
| 64 | of an android page. As of November 2018 this slightly reduces |
| 65 | performance and should not be used (1.7% decline in speedometer2, |
| 66 | 450K native library memory regression). |
| 67 | """ |
| 68 | NEIGHBOR_DISTANCE = 20 |
| 69 | FAR_DISTANCE = 1000 |
| 70 | MAX_CLUSTER_SIZE = 4096 # 4k pages on android. |
| 71 | |
Jesse McKenna | c0b694b7 | 2022-06-17 17:46:14 | [diff] [blame] | 72 | class _Cluster: |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 73 | def __init__(self, syms, size): |
| 74 | assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster' |
| 75 | self._syms = syms |
| 76 | self._size = size |
| 77 | |
| 78 | @property |
| 79 | def syms(self): |
| 80 | return self._syms |
| 81 | |
| 82 | @property |
| 83 | def binary_size(self): |
| 84 | return self._size |
| 85 | |
| 86 | @classmethod |
| 87 | def ClusteredSymbolLists(cls, sym_lists, size_map): |
| 88 | c = cls() |
| 89 | c.AddSymbolLists(sym_lists) |
| 90 | return c.ClusterToList(size_map) |
| 91 | |
| 92 | def __init__(self): |
| 93 | self._num_lists = None |
| 94 | self._neighbors = None |
| 95 | self._cluster_map = {} |
| 96 | self._symbol_size = lambda _: 0 # Maps a symbol to a size. |
| 97 | |
| 98 | def _MakeCluster(self, syms): |
| 99 | c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms)) |
| 100 | for s in syms: |
| 101 | self._cluster_map[s] = c |
| 102 | return c |
| 103 | |
| 104 | def ClusterOf(self, s): |
| 105 | if isinstance(s, self._Cluster): |
| 106 | assert self._cluster_map[s.syms[0]] == s |
| 107 | return s |
| 108 | if s in self._cluster_map: |
| 109 | return self._cluster_map[s] |
| 110 | return self._MakeCluster([s]) |
| 111 | |
| 112 | def Combine(self, a, b): |
| 113 | """Combine clusters. |
| 114 | |
| 115 | Args: |
| 116 | a, b: Clusters or str. The canonical cluster (ClusterOf) will be |
| 117 | used to do the combining. |
| 118 | |
| 119 | Returns: |
| 120 | A merged cluster from a and b, or None if a and b are in the same cluster. |
| 121 | """ |
| 122 | canonical_a = self.ClusterOf(a) |
| 123 | canonical_b = self.ClusterOf(b) |
| 124 | if canonical_a == canonical_b: |
| 125 | return None |
| 126 | return self._MakeCluster(canonical_a._syms + canonical_b._syms) |
| 127 | |
| 128 | def AddSymbolLists(self, sym_lists): |
| 129 | self._num_lists = len(sym_lists) |
| 130 | self._neighbors = self._CoalesceNeighbors( |
| 131 | self._ConstructNeighbors(sym_lists)) |
| 132 | |
| 133 | def _ConstructNeighbors(self, sym_lists): |
| 134 | neighbors = [] |
| 135 | for sym_list in sym_lists: |
| 136 | for i, s in enumerate(sym_list): |
Benoit Lize | 0c36952 | 2021-08-27 14:52:19 | [diff] [blame] | 137 | for j in range(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))): |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 138 | if s == sym_list[j]: |
| 139 | # Free functions that are static inline seem to be the only |
| 140 | # source of these duplicates. |
| 141 | continue |
| 142 | neighbors.append(Neighbor(s, sym_list[j], j - i)) |
| 143 | logging.info('Constructed %s symbol neighbors', len(neighbors)) |
| 144 | return neighbors |
| 145 | |
| 146 | def _CoalesceNeighbors(self, neighbors): |
| 147 | pairs = collections.defaultdict(list) |
| 148 | for n in neighbors: |
| 149 | pairs[(n.src, n.dst)].append(n.dist) |
| 150 | coalesced = [] |
| 151 | logging.info('Will coalesce over %s neighbor pairs', len(pairs)) |
| 152 | count = 0 |
| 153 | for (s, t) in pairs: |
| 154 | assert s != t, '{} != {}'.format(s, t) |
| 155 | if (t, s) in pairs and t < s: |
| 156 | # Only process each unordered pair once. |
| 157 | continue |
| 158 | count += 1 |
| 159 | if not (count % 1e6): |
| 160 | logging.info('tick') |
| 161 | distances = [] |
| 162 | if (s, t) in pairs: |
| 163 | distances.extend(pairs[(s, t)]) |
| 164 | if (t, s) in pairs: |
| 165 | distances.extend(-d for d in pairs[(t, s)]) |
| 166 | if distances: |
| 167 | num_missing = self._num_lists - len(distances) |
| 168 | avg_distance = (float(sum(distances)) + |
| 169 | self.FAR_DISTANCE * num_missing) / self._num_lists |
| 170 | if avg_distance > 0: |
| 171 | coalesced.append(Neighbor(s, t, avg_distance)) |
| 172 | else: |
| 173 | coalesced.append(Neighbor(t, s, avg_distance)) |
| 174 | return coalesced |
| 175 | |
| 176 | def ClusterToList(self, size_map=None): |
| 177 | """Merge the clusters with the smallest distances. |
| 178 | |
| 179 | Args: |
| 180 | size_map ({symbol: size} or None): Map symbol names to their size. Cluster |
| 181 | growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to |
| 182 | be zero and cluster growth is not stopped. |
| 183 | |
| 184 | Returns: |
| 185 | An ordered list of symbols from AddSymbolLists, appropriately clustered. |
| 186 | """ |
| 187 | if size_map: |
| 188 | self._symbol_size = lambda s: size_map[s] |
| 189 | if not self._num_lists or not self._neighbors: |
| 190 | # Some sort of trivial set of symbol lists, such as all being |
| 191 | # length 1. Return an empty ordering. |
| 192 | return [] |
| 193 | logging.info('Sorting %s neighbors', len(self._neighbors)) |
| 194 | self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst)) |
| 195 | logging.info('Clustering...') |
| 196 | count = 0 |
| 197 | while self._neighbors: |
| 198 | count += 1 |
| 199 | if not (count % 1e6): |
| 200 | logging.info('tock') |
| 201 | neighbor = self._neighbors.pop() |
| 202 | src = self.ClusterOf(neighbor.src) |
| 203 | dst = self.ClusterOf(neighbor.dst) |
| 204 | if (src == dst or |
| 205 | src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE): |
| 206 | continue |
| 207 | self.Combine(src, dst) |
| 208 | if size_map: |
| 209 | clusters_by_size = sorted(list(set(self._cluster_map.values())), |
| 210 | key=lambda c: -c.binary_size) |
| 211 | else: |
| 212 | clusters_by_size = sorted(list(set(self._cluster_map.values())), |
| 213 | key=lambda c: -len(c.syms)) |
| 214 | logging.info('Produced %s clusters', len(clusters_by_size)) |
| 215 | logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size) |
| 216 | for c in clusters_by_size[:4]]) |
| 217 | logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size) |
| 218 | for c in clusters_by_size[-4:]]) |
| 219 | ordered_syms = [] |
| 220 | for c in clusters_by_size: |
| 221 | ordered_syms.extend(c.syms) |
| 222 | assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!' |
| 223 | return ordered_syms |
| 224 | |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 225 | def _GetOffsetSymbolName(processor, dump_offset): |
| 226 | dump_offset_to_symbol_info = \ |
Christopher Grant | dfe1bac | 2019-07-05 13:34:10 | [diff] [blame] | 227 | processor.GetDumpOffsetToSymboInfolIncludingWhitelist() |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 228 | offset_to_primary = processor.OffsetToPrimaryMap() |
Benoit Lize | 0c36952 | 2021-08-27 14:52:19 | [diff] [blame] | 229 | idx = dump_offset // 2 |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 230 | assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), ( |
| 231 | 'Dump offset out of binary range') |
| 232 | symbol_info = dump_offset_to_symbol_info[idx] |
| 233 | assert symbol_info, ('A return address (offset = 0x{:08x}) does not map ' |
| 234 | 'to any symbol'.format(dump_offset)) |
| 235 | assert symbol_info.offset in offset_to_primary, ( |
| 236 | 'Offset not found in primary map!') |
| 237 | return offset_to_primary[symbol_info.offset].name |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 238 | |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 239 | def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False): |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 240 | raw_offsets = profiles.GetProcessOffsetLists() |
| 241 | process_symbols = collections.defaultdict(list) |
| 242 | seen_symbols = set() |
| 243 | for p in raw_offsets: |
| 244 | for offsets in raw_offsets[p]: |
| 245 | symbol_names = processor.GetOrderedSymbols( |
| 246 | processor.GetReachedOffsetsFromDump(offsets)) |
| 247 | process_symbols[p].append(symbol_names) |
| 248 | seen_symbols |= set(symbol_names) |
| 249 | if limit_cluster_size: |
| 250 | name_map = processor.NameToSymbolMap() |
| 251 | size_map = {name: name_map[name].size for name in seen_symbols} |
| 252 | else: |
| 253 | size_map = None |
| 254 | |
| 255 | # Process names from the profile dumps that are treated specially. |
| 256 | _RENDERER = 'renderer' |
| 257 | _BROWSER = 'browser' |
| 258 | |
| 259 | assert _RENDERER in process_symbols |
| 260 | assert _BROWSER in process_symbols |
| 261 | |
| 262 | renderer_clustering = Clustering.ClusteredSymbolLists( |
| 263 | process_symbols[_RENDERER], size_map) |
| 264 | browser_clustering = Clustering.ClusteredSymbolLists( |
| 265 | process_symbols[_BROWSER], size_map) |
| 266 | other_lists = [] |
| 267 | for process, syms in process_symbols.items(): |
| 268 | if process not in (_RENDERER, _BROWSER): |
| 269 | other_lists.extend(syms) |
| 270 | if other_lists: |
| 271 | other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map) |
| 272 | else: |
| 273 | other_clustering = [] |
| 274 | |
| 275 | # Start with the renderer cluster to favor rendering performance. |
Alice Wang | 689ad13 | 2022-06-23 08:57:49 | [diff] [blame] | 276 | final_ordering = list(renderer_clustering) |
Matthew Cary | 91df979 | 2018-11-30 14:35:15 | [diff] [blame] | 277 | seen = set(final_ordering) |
| 278 | final_ordering.extend(s for s in browser_clustering if s not in seen) |
| 279 | seen |= set(browser_clustering) |
| 280 | final_ordering.extend(s for s in other_clustering if s not in seen) |
| 281 | |
| 282 | return final_ordering |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 283 | |
Rasika Navarange | b039ea18 | 2024-03-05 15:58:10 | [diff] [blame] | 284 | |
| 285 | def ClusterOffsets(profiles, processor, limit_cluster_size=False): |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 286 | """Cluster profile offsets. |
| 287 | |
| 288 | Args: |
| 289 | profiles (ProfileManager) Manager of the profile dump files. |
| 290 | processor (SymbolOffsetProcessor) Symbol table processor for the dumps. |
Monica Basta | 99c101fa | 2019-05-21 13:50:05 | [diff] [blame] | 291 | |
| 292 | Returns: |
| 293 | A list of clustered symbol offsets. |
| 294 | """ |
Rasika Navarange | b039ea18 | 2024-03-05 15:58:10 | [diff] [blame] | 295 | return _ClusterOffsetsLists(profiles, processor, limit_cluster_size) |