blob: 53bad551af0c1e98f026b6dd2a5986548b59339b [file] [log] [blame]
Avi Drissmandfd880852022-09-15 20:11:091# Copyright 2018 The Chromium Authors
Matthew Cary91df9792018-11-30 14:35:152# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Clustering for function call-graph.
6
7See the Clustering class for a detailed description.
8"""
9
10import collections
11import itertools
12import logging
13
Matthew Cary91df9792018-11-30 14:35:1514Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist'))
Monica Basta99c101fa2019-05-21 13:50:0515CalleeInfo = collections.namedtuple('CalleeInfo',
16 ('index', 'callee_symbol',
17 'misses', 'caller_and_count'))
18CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count'))
Matthew Cary91df9792018-11-30 14:35:1519
20
Jesse McKennac0b694b72022-06-17 17:46:1421class Clustering:
Matthew Cary91df9792018-11-30 14:35:1522 """Cluster symbols.
23
24 We are given a list of the first function calls, ordered by
25 time. There are multiple lists: different benchmarks run multiple
26 times, as well as list from startup and then a second list after
27 startup (5 seconds) that runs until the benchmark memory dump.
28
29 We have evidence (see below) that this simple ordering of code from a
30 single profiling run (a load of a website) improves performance,
31 presumably by improving code locality. To reconstruct this ordering
32 using profiling information from multiple files, we cluster. Doing
33 this clustering over multiple runs on the speedometer benchmark
34 recovered speedometer performance compared with the legacy benchmark.
35
36 For each offset list, we record the distances between each symbol and
37 its neighborhood of the following k symbols (k=19, chosen
38 arbitrarily). For example, if we have an offset list of symbols
39 'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e,
40 3), etc. Then we average distances of a given neighbor pair over all
41 seen symbol lists. If we see an inversion (for example, (b->a, 3), we
42 use this as a distance of -3). For each file that a given pair does
43 not appear, that is, if the pair does not appear in that file or they
44 are separated by 20 symbols, we use a large distance D (D=1000). The
45 distances are then averages over all files. If the average is
46 negative, the neighbor pair is inverted and the distance flipped. The
47 idea is that if two symbols appear near each other in all profiling
48 runs, there is high confidence that they are usually called
49 together. If they don't appear near in some runs, there is less
50 confidence that they should be colocated. Symbol distances are taken
51 only as following distances to avoid confusing double-counting
52 possibilities as well as to give a clear ordering to combining
53 clusters.
54
55 Neighbors are sorted, and starting with the shortest distance, symbols
56 are coalesced into clusters. If the neighbor pair is (a->b), the
57 clusters containing a and b are combined in that order. If a and b are
58 already in the same cluster, nothing happens. After processing all
59 neighbors there is usually only one cluster; if there are multiple
60 clusters they are combined in order from largest to smallest (although
61 that choice may not matter).
62
63 Cluster merging may optionally be halted if they get above the size
64 of an android page. As of November 2018 this slightly reduces
65 performance and should not be used (1.7% decline in speedometer2,
66 450K native library memory regression).
67 """
68 NEIGHBOR_DISTANCE = 20
69 FAR_DISTANCE = 1000
70 MAX_CLUSTER_SIZE = 4096 # 4k pages on android.
71
Jesse McKennac0b694b72022-06-17 17:46:1472 class _Cluster:
Matthew Cary91df9792018-11-30 14:35:1573 def __init__(self, syms, size):
74 assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster'
75 self._syms = syms
76 self._size = size
77
78 @property
79 def syms(self):
80 return self._syms
81
82 @property
83 def binary_size(self):
84 return self._size
85
86 @classmethod
87 def ClusteredSymbolLists(cls, sym_lists, size_map):
88 c = cls()
89 c.AddSymbolLists(sym_lists)
90 return c.ClusterToList(size_map)
91
Monica Basta99c101fa2019-05-21 13:50:0592 @classmethod
93 def ClusterSymbolCallGraph(cls, call_graph, whitelist):
94 c = cls()
95 c.AddSymbolCallGraph(call_graph, whitelist)
96 return c.ClusterToList()
97
Matthew Cary91df9792018-11-30 14:35:1598 def __init__(self):
99 self._num_lists = None
100 self._neighbors = None
101 self._cluster_map = {}
102 self._symbol_size = lambda _: 0 # Maps a symbol to a size.
103
104 def _MakeCluster(self, syms):
105 c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms))
106 for s in syms:
107 self._cluster_map[s] = c
108 return c
109
110 def ClusterOf(self, s):
111 if isinstance(s, self._Cluster):
112 assert self._cluster_map[s.syms[0]] == s
113 return s
114 if s in self._cluster_map:
115 return self._cluster_map[s]
116 return self._MakeCluster([s])
117
118 def Combine(self, a, b):
119 """Combine clusters.
120
121 Args:
122 a, b: Clusters or str. The canonical cluster (ClusterOf) will be
123 used to do the combining.
124
125 Returns:
126 A merged cluster from a and b, or None if a and b are in the same cluster.
127 """
128 canonical_a = self.ClusterOf(a)
129 canonical_b = self.ClusterOf(b)
130 if canonical_a == canonical_b:
131 return None
132 return self._MakeCluster(canonical_a._syms + canonical_b._syms)
133
134 def AddSymbolLists(self, sym_lists):
135 self._num_lists = len(sym_lists)
136 self._neighbors = self._CoalesceNeighbors(
137 self._ConstructNeighbors(sym_lists))
138
Monica Basta99c101fa2019-05-21 13:50:05139 def AddSymbolCallGraph(self, call_graph, whitelist):
140 self._num_lists = len(call_graph)
141 self._neighbors = self._ConstructNeighborsFromGraph(call_graph, whitelist)
142
143 def _ConstructNeighborsFromGraph(self, call_graph, whitelist):
144 neighbors = []
145 pairs = collections.defaultdict()
146 # Each list item is a list of dict.
147 for process_items in call_graph:
148 for callee_info in process_items:
149 callee = callee_info.callee_symbol
150 for caller_info in callee_info.caller_and_count:
151 caller = caller_info.caller_symbol
152 if caller in whitelist or callee == caller:
153 continue
154
Christopher Grantdfe1bac2019-07-05 13:34:10155 # Multiply by -1, the bigger the count the smaller the distance
Monica Basta99c101fa2019-05-21 13:50:05156 # should be.
157 dist = caller_info.count * -1
158 if (caller, callee) in pairs:
159 pairs[(caller, callee)] += dist
160 elif (callee, caller) in pairs:
161 pairs[(callee, caller)] += dist
162 else:
163 pairs[(caller, callee)] = dist
164
165 for (s, t) in pairs:
166 assert s != t and (t, s) not in pairs, ('Unexpected shuffled pair:'
167 ' ({}, {})'.format(s, t))
168 neighbors.append(Neighbor(s, t, pairs[(s, t)]))
169
170 return neighbors
171
Matthew Cary91df9792018-11-30 14:35:15172 def _ConstructNeighbors(self, sym_lists):
173 neighbors = []
174 for sym_list in sym_lists:
175 for i, s in enumerate(sym_list):
Benoit Lize0c369522021-08-27 14:52:19176 for j in range(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))):
Matthew Cary91df9792018-11-30 14:35:15177 if s == sym_list[j]:
178 # Free functions that are static inline seem to be the only
179 # source of these duplicates.
180 continue
181 neighbors.append(Neighbor(s, sym_list[j], j - i))
182 logging.info('Constructed %s symbol neighbors', len(neighbors))
183 return neighbors
184
185 def _CoalesceNeighbors(self, neighbors):
186 pairs = collections.defaultdict(list)
187 for n in neighbors:
188 pairs[(n.src, n.dst)].append(n.dist)
189 coalesced = []
190 logging.info('Will coalesce over %s neighbor pairs', len(pairs))
191 count = 0
192 for (s, t) in pairs:
193 assert s != t, '{} != {}'.format(s, t)
194 if (t, s) in pairs and t < s:
195 # Only process each unordered pair once.
196 continue
197 count += 1
198 if not (count % 1e6):
199 logging.info('tick')
200 distances = []
201 if (s, t) in pairs:
202 distances.extend(pairs[(s, t)])
203 if (t, s) in pairs:
204 distances.extend(-d for d in pairs[(t, s)])
205 if distances:
206 num_missing = self._num_lists - len(distances)
207 avg_distance = (float(sum(distances)) +
208 self.FAR_DISTANCE * num_missing) / self._num_lists
209 if avg_distance > 0:
210 coalesced.append(Neighbor(s, t, avg_distance))
211 else:
212 coalesced.append(Neighbor(t, s, avg_distance))
213 return coalesced
214
215 def ClusterToList(self, size_map=None):
216 """Merge the clusters with the smallest distances.
217
218 Args:
219 size_map ({symbol: size} or None): Map symbol names to their size. Cluster
220 growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to
221 be zero and cluster growth is not stopped.
222
223 Returns:
224 An ordered list of symbols from AddSymbolLists, appropriately clustered.
225 """
226 if size_map:
227 self._symbol_size = lambda s: size_map[s]
228 if not self._num_lists or not self._neighbors:
229 # Some sort of trivial set of symbol lists, such as all being
230 # length 1. Return an empty ordering.
231 return []
232 logging.info('Sorting %s neighbors', len(self._neighbors))
233 self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst))
234 logging.info('Clustering...')
235 count = 0
236 while self._neighbors:
237 count += 1
238 if not (count % 1e6):
239 logging.info('tock')
240 neighbor = self._neighbors.pop()
241 src = self.ClusterOf(neighbor.src)
242 dst = self.ClusterOf(neighbor.dst)
243 if (src == dst or
244 src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE):
245 continue
246 self.Combine(src, dst)
247 if size_map:
248 clusters_by_size = sorted(list(set(self._cluster_map.values())),
249 key=lambda c: -c.binary_size)
250 else:
251 clusters_by_size = sorted(list(set(self._cluster_map.values())),
252 key=lambda c: -len(c.syms))
253 logging.info('Produced %s clusters', len(clusters_by_size))
254 logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
255 for c in clusters_by_size[:4]])
256 logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
257 for c in clusters_by_size[-4:]])
258 ordered_syms = []
259 for c in clusters_by_size:
260 ordered_syms.extend(c.syms)
261 assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!'
262 return ordered_syms
263
Monica Basta99c101fa2019-05-21 13:50:05264def _GetOffsetSymbolName(processor, dump_offset):
265 dump_offset_to_symbol_info = \
Christopher Grantdfe1bac2019-07-05 13:34:10266 processor.GetDumpOffsetToSymboInfolIncludingWhitelist()
Monica Basta99c101fa2019-05-21 13:50:05267 offset_to_primary = processor.OffsetToPrimaryMap()
Benoit Lize0c369522021-08-27 14:52:19268 idx = dump_offset // 2
Monica Basta99c101fa2019-05-21 13:50:05269 assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), (
270 'Dump offset out of binary range')
271 symbol_info = dump_offset_to_symbol_info[idx]
272 assert symbol_info, ('A return address (offset = 0x{:08x}) does not map '
273 'to any symbol'.format(dump_offset))
274 assert symbol_info.offset in offset_to_primary, (
275 'Offset not found in primary map!')
276 return offset_to_primary[symbol_info.offset].name
Matthew Cary91df9792018-11-30 14:35:15277
Monica Basta99c101fa2019-05-21 13:50:05278def _GetSymbolsCallGraph(profiles, processor):
279 """Maps each offset in the call graph to the corresponding symbol name.
Matthew Cary91df9792018-11-30 14:35:15280
281 Args:
282 profiles (ProfileManager) Manager of the profile dump files.
283 processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
284
285 Returns:
Monica Basta99c101fa2019-05-21 13:50:05286 A dict that maps each process type (ex: browser, renderer, etc.) to a list
287 of processes of that type. Each process is a list that contains the
288 call graph information. The call graph is represented by a list where each
289 item is a dict that contains: callee, 3 caller-count pairs, misses.
Matthew Cary91df9792018-11-30 14:35:15290 """
Monica Basta99c101fa2019-05-21 13:50:05291 offsets_graph = profiles.GetProcessOffsetGraph();
292 process_symbols_graph = collections.defaultdict(list)
293
Christopher Grantdfe1bac2019-07-05 13:34:10294 # |process_type| can be : browser, renderer, gpu-process, etc.
Monica Basta99c101fa2019-05-21 13:50:05295 for process_type in offsets_graph:
296 for process in offsets_graph[process_type]:
Benoit Lize0c369522021-08-27 14:52:19297 process = sorted(process, key=lambda k: int(k['index']))
Monica Basta99c101fa2019-05-21 13:50:05298 graph_list = []
299 for el in process:
Benoit Lize0c369522021-08-27 14:52:19300 index = int(el['index'])
Monica Basta99c101fa2019-05-21 13:50:05301 callee_symbol = _GetOffsetSymbolName(processor,
Benoit Lize0c369522021-08-27 14:52:19302 int(el['callee_offset']))
Monica Basta99c101fa2019-05-21 13:50:05303 misses = 0
304 caller_and_count = []
305 for bucket in el['caller_and_count']:
Benoit Lize0c369522021-08-27 14:52:19306 caller_offset = int(bucket['caller_offset'])
307 count = int(bucket['count'])
Monica Basta99c101fa2019-05-21 13:50:05308 if caller_offset == 0:
Christopher Grantdfe1bac2019-07-05 13:34:10309 misses += count
Monica Basta99c101fa2019-05-21 13:50:05310 continue
311
312 caller_symbol_name = _GetOffsetSymbolName(processor, caller_offset)
313 caller_info = CallerInfo(caller_symbol=caller_symbol_name,
314 count=count)
315 caller_and_count.append(caller_info)
316
317 callee_info = CalleeInfo(index=index,
318 callee_symbol=callee_symbol,
319 misses=misses,
320 caller_and_count=caller_and_count)
321 graph_list.append(callee_info)
322 process_symbols_graph[process_type].append(graph_list)
323 return process_symbols_graph
324
325def _ClusterOffsetsFromCallGraph(profiles, processor):
326 symbols_call_graph = _GetSymbolsCallGraph(profiles, processor)
327 # Process names from the profile dumps that are treated specially.
328 _RENDERER = 'renderer'
329 _BROWSER = 'browser'
330
331 assert _RENDERER in symbols_call_graph
332 assert _BROWSER in symbols_call_graph
333 whitelist = processor.GetWhitelistSymbols()
334 renderer_clustering = Clustering.ClusterSymbolCallGraph(
335 symbols_call_graph[_RENDERER], whitelist)
336 browser_clustering = Clustering.ClusterSymbolCallGraph(
337 symbols_call_graph[_BROWSER], whitelist)
338 other_lists = []
339 for process in symbols_call_graph:
340 if process not in (_RENDERER, _BROWSER):
341 other_lists.extend(symbols_call_graph[process])
342 if other_lists:
343 other_clustering = Clustering.ClusterSymbolCallGraph(other_lists, whitelist)
344 else:
345 other_clustering = []
346
347 # Start with the renderer cluster to favor rendering performance.
Alice Wang689ad132022-06-23 08:57:49348 final_ordering = list(renderer_clustering)
Monica Basta99c101fa2019-05-21 13:50:05349 seen = set(final_ordering)
350 final_ordering.extend(s for s in browser_clustering if s not in seen)
351 seen |= set(browser_clustering)
352 final_ordering.extend(s for s in other_clustering if s not in seen)
353
354 return final_ordering
355
356def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False):
Matthew Cary91df9792018-11-30 14:35:15357 raw_offsets = profiles.GetProcessOffsetLists()
358 process_symbols = collections.defaultdict(list)
359 seen_symbols = set()
360 for p in raw_offsets:
361 for offsets in raw_offsets[p]:
362 symbol_names = processor.GetOrderedSymbols(
363 processor.GetReachedOffsetsFromDump(offsets))
364 process_symbols[p].append(symbol_names)
365 seen_symbols |= set(symbol_names)
366 if limit_cluster_size:
367 name_map = processor.NameToSymbolMap()
368 size_map = {name: name_map[name].size for name in seen_symbols}
369 else:
370 size_map = None
371
372 # Process names from the profile dumps that are treated specially.
373 _RENDERER = 'renderer'
374 _BROWSER = 'browser'
375
376 assert _RENDERER in process_symbols
377 assert _BROWSER in process_symbols
378
379 renderer_clustering = Clustering.ClusteredSymbolLists(
380 process_symbols[_RENDERER], size_map)
381 browser_clustering = Clustering.ClusteredSymbolLists(
382 process_symbols[_BROWSER], size_map)
383 other_lists = []
384 for process, syms in process_symbols.items():
385 if process not in (_RENDERER, _BROWSER):
386 other_lists.extend(syms)
387 if other_lists:
388 other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map)
389 else:
390 other_clustering = []
391
392 # Start with the renderer cluster to favor rendering performance.
Alice Wang689ad132022-06-23 08:57:49393 final_ordering = list(renderer_clustering)
Matthew Cary91df9792018-11-30 14:35:15394 seen = set(final_ordering)
395 final_ordering.extend(s for s in browser_clustering if s not in seen)
396 seen |= set(browser_clustering)
397 final_ordering.extend(s for s in other_clustering if s not in seen)
398
399 return final_ordering
Monica Basta99c101fa2019-05-21 13:50:05400
401def ClusterOffsets(profiles, processor, limit_cluster_size=False,
402 call_graph=False):
403 """Cluster profile offsets.
404
405 Args:
406 profiles (ProfileManager) Manager of the profile dump files.
407 processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
408 call_graph (bool) whether the call graph instrumentation was used.
409
410 Returns:
411 A list of clustered symbol offsets.
412"""
413 if not call_graph:
414 return _ClusterOffsetsLists(profiles, processor, limit_cluster_size)
Jesse McKennac0b694b72022-06-17 17:46:14415 return _ClusterOffsetsFromCallGraph(profiles, processor)