Blame - tools/cygprofile/cluster.py - chromium/src.git

blob: 53bad551af0c1e98f026b6dd2a5986548b59339b [file] [log] [blame]

Avi Drissman	dfd88085	2022-09-15 20:11:09	[diff] [blame^]	1	# Copyright 2018 The Chromium Authors
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4
				5	"""Clustering for function call-graph.
				6
				7	See the Clustering class for a detailed description.
				8	"""
				9
				10	import collections
				11	import itertools
				12	import logging
				13
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	14	Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist'))
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	15	CalleeInfo = collections.namedtuple('CalleeInfo',
				16	('index', 'callee_symbol',
				17	'misses', 'caller_and_count'))
				18	CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count'))
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	19
				20
Jesse McKenna	c0b694b7	2022-06-17 17:46:14	[diff] [blame]	21	class Clustering:
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	22	"""Cluster symbols.
				23
				24	We are given a list of the first function calls, ordered by
				25	time. There are multiple lists: different benchmarks run multiple
				26	times, as well as list from startup and then a second list after
				27	startup (5 seconds) that runs until the benchmark memory dump.
				28
				29	We have evidence (see below) that this simple ordering of code from a
				30	single profiling run (a load of a website) improves performance,
				31	presumably by improving code locality. To reconstruct this ordering
				32	using profiling information from multiple files, we cluster. Doing
				33	this clustering over multiple runs on the speedometer benchmark
				34	recovered speedometer performance compared with the legacy benchmark.
				35
				36	For each offset list, we record the distances between each symbol and
				37	its neighborhood of the following k symbols (k=19, chosen
				38	arbitrarily). For example, if we have an offset list of symbols
				39	'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e,
				40	3), etc. Then we average distances of a given neighbor pair over all
				41	seen symbol lists. If we see an inversion (for example, (b->a, 3), we
				42	use this as a distance of -3). For each file that a given pair does
				43	not appear, that is, if the pair does not appear in that file or they
				44	are separated by 20 symbols, we use a large distance D (D=1000). The
				45	distances are then averages over all files. If the average is
				46	negative, the neighbor pair is inverted and the distance flipped. The
				47	idea is that if two symbols appear near each other in all profiling
				48	runs, there is high confidence that they are usually called
				49	together. If they don't appear near in some runs, there is less
				50	confidence that they should be colocated. Symbol distances are taken
				51	only as following distances to avoid confusing double-counting
				52	possibilities as well as to give a clear ordering to combining
				53	clusters.
				54
				55	Neighbors are sorted, and starting with the shortest distance, symbols
				56	are coalesced into clusters. If the neighbor pair is (a->b), the
				57	clusters containing a and b are combined in that order. If a and b are
				58	already in the same cluster, nothing happens. After processing all
				59	neighbors there is usually only one cluster; if there are multiple
				60	clusters they are combined in order from largest to smallest (although
				61	that choice may not matter).
				62
				63	Cluster merging may optionally be halted if they get above the size
				64	of an android page. As of November 2018 this slightly reduces
				65	performance and should not be used (1.7% decline in speedometer2,
				66	450K native library memory regression).
				67	"""
				68	NEIGHBOR_DISTANCE = 20
				69	FAR_DISTANCE = 1000
				70	MAX_CLUSTER_SIZE = 4096 # 4k pages on android.
				71
Jesse McKenna	c0b694b7	2022-06-17 17:46:14	[diff] [blame]	72	class _Cluster:
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	73	def __init__(self, syms, size):
				74	assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster'
				75	self._syms = syms
				76	self._size = size
				77
				78	@property
				79	def syms(self):
				80	return self._syms
				81
				82	@property
				83	def binary_size(self):
				84	return self._size
				85
				86	@classmethod
				87	def ClusteredSymbolLists(cls, sym_lists, size_map):
				88	c = cls()
				89	c.AddSymbolLists(sym_lists)
				90	return c.ClusterToList(size_map)
				91
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	92	@classmethod
				93	def ClusterSymbolCallGraph(cls, call_graph, whitelist):
				94	c = cls()
				95	c.AddSymbolCallGraph(call_graph, whitelist)
				96	return c.ClusterToList()
				97
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	98	def __init__(self):
				99	self._num_lists = None
				100	self._neighbors = None
				101	self._cluster_map = {}
				102	self._symbol_size = lambda _: 0 # Maps a symbol to a size.
				103
				104	def _MakeCluster(self, syms):
				105	c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms))
				106	for s in syms:
				107	self._cluster_map[s] = c
				108	return c
				109
				110	def ClusterOf(self, s):
				111	if isinstance(s, self._Cluster):
				112	assert self._cluster_map[s.syms[0]] == s
				113	return s
				114	if s in self._cluster_map:
				115	return self._cluster_map[s]
				116	return self._MakeCluster([s])
				117
				118	def Combine(self, a, b):
				119	"""Combine clusters.
				120
				121	Args:
				122	a, b: Clusters or str. The canonical cluster (ClusterOf) will be
				123	used to do the combining.
				124
				125	Returns:
				126	A merged cluster from a and b, or None if a and b are in the same cluster.
				127	"""
				128	canonical_a = self.ClusterOf(a)
				129	canonical_b = self.ClusterOf(b)
				130	if canonical_a == canonical_b:
				131	return None
				132	return self._MakeCluster(canonical_a._syms + canonical_b._syms)
				133
				134	def AddSymbolLists(self, sym_lists):
				135	self._num_lists = len(sym_lists)
				136	self._neighbors = self._CoalesceNeighbors(
				137	self._ConstructNeighbors(sym_lists))
				138
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	139	def AddSymbolCallGraph(self, call_graph, whitelist):
				140	self._num_lists = len(call_graph)
				141	self._neighbors = self._ConstructNeighborsFromGraph(call_graph, whitelist)
				142
				143	def _ConstructNeighborsFromGraph(self, call_graph, whitelist):
				144	neighbors = []
				145	pairs = collections.defaultdict()
				146	# Each list item is a list of dict.
				147	for process_items in call_graph:
				148	for callee_info in process_items:
				149	callee = callee_info.callee_symbol
				150	for caller_info in callee_info.caller_and_count:
				151	caller = caller_info.caller_symbol
				152	if caller in whitelist or callee == caller:
				153	continue
				154
Christopher Grant	dfe1bac	2019-07-05 13:34:10	[diff] [blame]	155	# Multiply by -1, the bigger the count the smaller the distance
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	156	# should be.
				157	dist = caller_info.count * -1
				158	if (caller, callee) in pairs:
				159	pairs[(caller, callee)] += dist
				160	elif (callee, caller) in pairs:
				161	pairs[(callee, caller)] += dist
				162	else:
				163	pairs[(caller, callee)] = dist
				164
				165	for (s, t) in pairs:
				166	assert s != t and (t, s) not in pairs, ('Unexpected shuffled pair:'
				167	' ({}, {})'.format(s, t))
				168	neighbors.append(Neighbor(s, t, pairs[(s, t)]))
				169
				170	return neighbors
				171
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	172	def _ConstructNeighbors(self, sym_lists):
				173	neighbors = []
				174	for sym_list in sym_lists:
				175	for i, s in enumerate(sym_list):
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	176	for j in range(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))):
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	177	if s == sym_list[j]:
				178	# Free functions that are static inline seem to be the only
				179	# source of these duplicates.
				180	continue
				181	neighbors.append(Neighbor(s, sym_list[j], j - i))
				182	logging.info('Constructed %s symbol neighbors', len(neighbors))
				183	return neighbors
				184
				185	def _CoalesceNeighbors(self, neighbors):
				186	pairs = collections.defaultdict(list)
				187	for n in neighbors:
				188	pairs[(n.src, n.dst)].append(n.dist)
				189	coalesced = []
				190	logging.info('Will coalesce over %s neighbor pairs', len(pairs))
				191	count = 0
				192	for (s, t) in pairs:
				193	assert s != t, '{} != {}'.format(s, t)
				194	if (t, s) in pairs and t < s:
				195	# Only process each unordered pair once.
				196	continue
				197	count += 1
				198	if not (count % 1e6):
				199	logging.info('tick')
				200	distances = []
				201	if (s, t) in pairs:
				202	distances.extend(pairs[(s, t)])
				203	if (t, s) in pairs:
				204	distances.extend(-d for d in pairs[(t, s)])
				205	if distances:
				206	num_missing = self._num_lists - len(distances)
				207	avg_distance = (float(sum(distances)) +
				208	self.FAR_DISTANCE * num_missing) / self._num_lists
				209	if avg_distance > 0:
				210	coalesced.append(Neighbor(s, t, avg_distance))
				211	else:
				212	coalesced.append(Neighbor(t, s, avg_distance))
				213	return coalesced
				214
				215	def ClusterToList(self, size_map=None):
				216	"""Merge the clusters with the smallest distances.
				217
				218	Args:
				219	size_map ({symbol: size} or None): Map symbol names to their size. Cluster
				220	growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to
				221	be zero and cluster growth is not stopped.
				222
				223	Returns:
				224	An ordered list of symbols from AddSymbolLists, appropriately clustered.
				225	"""
				226	if size_map:
				227	self._symbol_size = lambda s: size_map[s]
				228	if not self._num_lists or not self._neighbors:
				229	# Some sort of trivial set of symbol lists, such as all being
				230	# length 1. Return an empty ordering.
				231	return []
				232	logging.info('Sorting %s neighbors', len(self._neighbors))
				233	self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst))
				234	logging.info('Clustering...')
				235	count = 0
				236	while self._neighbors:
				237	count += 1
				238	if not (count % 1e6):
				239	logging.info('tock')
				240	neighbor = self._neighbors.pop()
				241	src = self.ClusterOf(neighbor.src)
				242	dst = self.ClusterOf(neighbor.dst)
				243	if (src == dst or
				244	src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE):
				245	continue
				246	self.Combine(src, dst)
				247	if size_map:
				248	clusters_by_size = sorted(list(set(self._cluster_map.values())),
				249	key=lambda c: -c.binary_size)
				250	else:
				251	clusters_by_size = sorted(list(set(self._cluster_map.values())),
				252	key=lambda c: -len(c.syms))
				253	logging.info('Produced %s clusters', len(clusters_by_size))
				254	logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
				255	for c in clusters_by_size[:4]])
				256	logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
				257	for c in clusters_by_size[-4:]])
				258	ordered_syms = []
				259	for c in clusters_by_size:
				260	ordered_syms.extend(c.syms)
				261	assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!'
				262	return ordered_syms
				263
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	264	def _GetOffsetSymbolName(processor, dump_offset):
				265	dump_offset_to_symbol_info = \
Christopher Grant	dfe1bac	2019-07-05 13:34:10	[diff] [blame]	266	processor.GetDumpOffsetToSymboInfolIncludingWhitelist()
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	267	offset_to_primary = processor.OffsetToPrimaryMap()
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	268	idx = dump_offset // 2
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	269	assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), (
				270	'Dump offset out of binary range')
				271	symbol_info = dump_offset_to_symbol_info[idx]
				272	assert symbol_info, ('A return address (offset = 0x{:08x}) does not map '
				273	'to any symbol'.format(dump_offset))
				274	assert symbol_info.offset in offset_to_primary, (
				275	'Offset not found in primary map!')
				276	return offset_to_primary[symbol_info.offset].name
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	277
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	278	def _GetSymbolsCallGraph(profiles, processor):
				279	"""Maps each offset in the call graph to the corresponding symbol name.
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	280
				281	Args:
				282	profiles (ProfileManager) Manager of the profile dump files.
				283	processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
				284
				285	Returns:
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	286	A dict that maps each process type (ex: browser, renderer, etc.) to a list
				287	of processes of that type. Each process is a list that contains the
				288	call graph information. The call graph is represented by a list where each
				289	item is a dict that contains: callee, 3 caller-count pairs, misses.
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	290	"""
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	291	offsets_graph = profiles.GetProcessOffsetGraph();
				292	process_symbols_graph = collections.defaultdict(list)
				293
Christopher Grant	dfe1bac	2019-07-05 13:34:10	[diff] [blame]	294	# \|process_type\| can be : browser, renderer, gpu-process, etc.
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	295	for process_type in offsets_graph:
				296	for process in offsets_graph[process_type]:
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	297	process = sorted(process, key=lambda k: int(k['index']))
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	298	graph_list = []
				299	for el in process:
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	300	index = int(el['index'])
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	301	callee_symbol = _GetOffsetSymbolName(processor,
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	302	int(el['callee_offset']))
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	303	misses = 0
				304	caller_and_count = []
				305	for bucket in el['caller_and_count']:
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	306	caller_offset = int(bucket['caller_offset'])
				307	count = int(bucket['count'])
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	308	if caller_offset == 0:
Christopher Grant	dfe1bac	2019-07-05 13:34:10	[diff] [blame]	309	misses += count
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	310	continue
				311
				312	caller_symbol_name = _GetOffsetSymbolName(processor, caller_offset)
				313	caller_info = CallerInfo(caller_symbol=caller_symbol_name,
				314	count=count)
				315	caller_and_count.append(caller_info)
				316
				317	callee_info = CalleeInfo(index=index,
				318	callee_symbol=callee_symbol,
				319	misses=misses,
				320	caller_and_count=caller_and_count)
				321	graph_list.append(callee_info)
				322	process_symbols_graph[process_type].append(graph_list)
				323	return process_symbols_graph
				324
				325	def _ClusterOffsetsFromCallGraph(profiles, processor):
				326	symbols_call_graph = _GetSymbolsCallGraph(profiles, processor)
				327	# Process names from the profile dumps that are treated specially.
				328	_RENDERER = 'renderer'
				329	_BROWSER = 'browser'
				330
				331	assert _RENDERER in symbols_call_graph
				332	assert _BROWSER in symbols_call_graph
				333	whitelist = processor.GetWhitelistSymbols()
				334	renderer_clustering = Clustering.ClusterSymbolCallGraph(
				335	symbols_call_graph[_RENDERER], whitelist)
				336	browser_clustering = Clustering.ClusterSymbolCallGraph(
				337	symbols_call_graph[_BROWSER], whitelist)
				338	other_lists = []
				339	for process in symbols_call_graph:
				340	if process not in (_RENDERER, _BROWSER):
				341	other_lists.extend(symbols_call_graph[process])
				342	if other_lists:
				343	other_clustering = Clustering.ClusterSymbolCallGraph(other_lists, whitelist)
				344	else:
				345	other_clustering = []
				346
				347	# Start with the renderer cluster to favor rendering performance.
Alice Wang	689ad13	2022-06-23 08:57:49	[diff] [blame]	348	final_ordering = list(renderer_clustering)
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	349	seen = set(final_ordering)
				350	final_ordering.extend(s for s in browser_clustering if s not in seen)
				351	seen \|= set(browser_clustering)
				352	final_ordering.extend(s for s in other_clustering if s not in seen)
				353
				354	return final_ordering
				355
				356	def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False):
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	357	raw_offsets = profiles.GetProcessOffsetLists()
				358	process_symbols = collections.defaultdict(list)
				359	seen_symbols = set()
				360	for p in raw_offsets:
				361	for offsets in raw_offsets[p]:
				362	symbol_names = processor.GetOrderedSymbols(
				363	processor.GetReachedOffsetsFromDump(offsets))
				364	process_symbols[p].append(symbol_names)
				365	seen_symbols \|= set(symbol_names)
				366	if limit_cluster_size:
				367	name_map = processor.NameToSymbolMap()
				368	size_map = {name: name_map[name].size for name in seen_symbols}
				369	else:
				370	size_map = None
				371
				372	# Process names from the profile dumps that are treated specially.
				373	_RENDERER = 'renderer'
				374	_BROWSER = 'browser'
				375
				376	assert _RENDERER in process_symbols
				377	assert _BROWSER in process_symbols
				378
				379	renderer_clustering = Clustering.ClusteredSymbolLists(
				380	process_symbols[_RENDERER], size_map)
				381	browser_clustering = Clustering.ClusteredSymbolLists(
				382	process_symbols[_BROWSER], size_map)
				383	other_lists = []
				384	for process, syms in process_symbols.items():
				385	if process not in (_RENDERER, _BROWSER):
				386	other_lists.extend(syms)
				387	if other_lists:
				388	other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map)
				389	else:
				390	other_clustering = []
				391
				392	# Start with the renderer cluster to favor rendering performance.
Alice Wang	689ad13	2022-06-23 08:57:49	[diff] [blame]	393	final_ordering = list(renderer_clustering)
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	394	seen = set(final_ordering)
				395	final_ordering.extend(s for s in browser_clustering if s not in seen)
				396	seen \|= set(browser_clustering)
				397	final_ordering.extend(s for s in other_clustering if s not in seen)
				398
				399	return final_ordering
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	400
				401	def ClusterOffsets(profiles, processor, limit_cluster_size=False,
				402	call_graph=False):
				403	"""Cluster profile offsets.
				404
				405	Args:
				406	profiles (ProfileManager) Manager of the profile dump files.
				407	processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
				408	call_graph (bool) whether the call graph instrumentation was used.
				409
				410	Returns:
				411	A list of clustered symbol offsets.
				412	"""
				413	if not call_graph:
				414	return _ClusterOffsetsLists(profiles, processor, limit_cluster_size)
Jesse McKenna	c0b694b7	2022-06-17 17:46:14	[diff] [blame]	415	return _ClusterOffsetsFromCallGraph(profiles, processor)