Blame - tools/cygprofile/cluster.py - chromium/src.git

blob: 2103d387329214fc1e42548bff477bc8283abb07 [file] [log] [blame]

Avi Drissman	dfd88085	2022-09-15 20:11:09	[diff] [blame]	1	# Copyright 2018 The Chromium Authors
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4
				5	"""Clustering for function call-graph.
				6
				7	See the Clustering class for a detailed description.
				8	"""
				9
				10	import collections
				11	import itertools
				12	import logging
				13
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	14	Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist'))
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	15	CalleeInfo = collections.namedtuple('CalleeInfo',
				16	('index', 'callee_symbol',
				17	'misses', 'caller_and_count'))
				18	CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count'))
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	19
				20
Jesse McKenna	c0b694b7	2022-06-17 17:46:14	[diff] [blame]	21	class Clustering:
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	22	"""Cluster symbols.
				23
				24	We are given a list of the first function calls, ordered by
				25	time. There are multiple lists: different benchmarks run multiple
				26	times, as well as list from startup and then a second list after
				27	startup (5 seconds) that runs until the benchmark memory dump.
				28
				29	We have evidence (see below) that this simple ordering of code from a
				30	single profiling run (a load of a website) improves performance,
				31	presumably by improving code locality. To reconstruct this ordering
				32	using profiling information from multiple files, we cluster. Doing
				33	this clustering over multiple runs on the speedometer benchmark
				34	recovered speedometer performance compared with the legacy benchmark.
				35
				36	For each offset list, we record the distances between each symbol and
				37	its neighborhood of the following k symbols (k=19, chosen
				38	arbitrarily). For example, if we have an offset list of symbols
				39	'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e,
				40	3), etc. Then we average distances of a given neighbor pair over all
				41	seen symbol lists. If we see an inversion (for example, (b->a, 3), we
				42	use this as a distance of -3). For each file that a given pair does
				43	not appear, that is, if the pair does not appear in that file or they
				44	are separated by 20 symbols, we use a large distance D (D=1000). The
				45	distances are then averages over all files. If the average is
				46	negative, the neighbor pair is inverted and the distance flipped. The
				47	idea is that if two symbols appear near each other in all profiling
				48	runs, there is high confidence that they are usually called
				49	together. If they don't appear near in some runs, there is less
				50	confidence that they should be colocated. Symbol distances are taken
				51	only as following distances to avoid confusing double-counting
				52	possibilities as well as to give a clear ordering to combining
				53	clusters.
				54
				55	Neighbors are sorted, and starting with the shortest distance, symbols
				56	are coalesced into clusters. If the neighbor pair is (a->b), the
				57	clusters containing a and b are combined in that order. If a and b are
				58	already in the same cluster, nothing happens. After processing all
				59	neighbors there is usually only one cluster; if there are multiple
				60	clusters they are combined in order from largest to smallest (although
				61	that choice may not matter).
				62
				63	Cluster merging may optionally be halted if they get above the size
				64	of an android page. As of November 2018 this slightly reduces
				65	performance and should not be used (1.7% decline in speedometer2,
				66	450K native library memory regression).
				67	"""
				68	NEIGHBOR_DISTANCE = 20
				69	FAR_DISTANCE = 1000
				70	MAX_CLUSTER_SIZE = 4096 # 4k pages on android.
				71
Jesse McKenna	c0b694b7	2022-06-17 17:46:14	[diff] [blame]	72	class _Cluster:
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	73	def __init__(self, syms, size):
				74	assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster'
				75	self._syms = syms
				76	self._size = size
				77
				78	@property
				79	def syms(self):
				80	return self._syms
				81
				82	@property
				83	def binary_size(self):
				84	return self._size
				85
				86	@classmethod
				87	def ClusteredSymbolLists(cls, sym_lists, size_map):
				88	c = cls()
				89	c.AddSymbolLists(sym_lists)
				90	return c.ClusterToList(size_map)
				91
				92	def __init__(self):
				93	self._num_lists = None
				94	self._neighbors = None
				95	self._cluster_map = {}
				96	self._symbol_size = lambda _: 0 # Maps a symbol to a size.
				97
				98	def _MakeCluster(self, syms):
				99	c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms))
				100	for s in syms:
				101	self._cluster_map[s] = c
				102	return c
				103
				104	def ClusterOf(self, s):
				105	if isinstance(s, self._Cluster):
				106	assert self._cluster_map[s.syms[0]] == s
				107	return s
				108	if s in self._cluster_map:
				109	return self._cluster_map[s]
				110	return self._MakeCluster([s])
				111
				112	def Combine(self, a, b):
				113	"""Combine clusters.
				114
				115	Args:
				116	a, b: Clusters or str. The canonical cluster (ClusterOf) will be
				117	used to do the combining.
				118
				119	Returns:
				120	A merged cluster from a and b, or None if a and b are in the same cluster.
				121	"""
				122	canonical_a = self.ClusterOf(a)
				123	canonical_b = self.ClusterOf(b)
				124	if canonical_a == canonical_b:
				125	return None
				126	return self._MakeCluster(canonical_a._syms + canonical_b._syms)
				127
				128	def AddSymbolLists(self, sym_lists):
				129	self._num_lists = len(sym_lists)
				130	self._neighbors = self._CoalesceNeighbors(
				131	self._ConstructNeighbors(sym_lists))
				132
				133	def _ConstructNeighbors(self, sym_lists):
				134	neighbors = []
				135	for sym_list in sym_lists:
				136	for i, s in enumerate(sym_list):
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	137	for j in range(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))):
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	138	if s == sym_list[j]:
				139	# Free functions that are static inline seem to be the only
				140	# source of these duplicates.
				141	continue
				142	neighbors.append(Neighbor(s, sym_list[j], j - i))
				143	logging.info('Constructed %s symbol neighbors', len(neighbors))
				144	return neighbors
				145
				146	def _CoalesceNeighbors(self, neighbors):
				147	pairs = collections.defaultdict(list)
				148	for n in neighbors:
				149	pairs[(n.src, n.dst)].append(n.dist)
				150	coalesced = []
				151	logging.info('Will coalesce over %s neighbor pairs', len(pairs))
				152	count = 0
				153	for (s, t) in pairs:
				154	assert s != t, '{} != {}'.format(s, t)
				155	if (t, s) in pairs and t < s:
				156	# Only process each unordered pair once.
				157	continue
				158	count += 1
				159	if not (count % 1e6):
				160	logging.info('tick')
				161	distances = []
				162	if (s, t) in pairs:
				163	distances.extend(pairs[(s, t)])
				164	if (t, s) in pairs:
				165	distances.extend(-d for d in pairs[(t, s)])
				166	if distances:
				167	num_missing = self._num_lists - len(distances)
				168	avg_distance = (float(sum(distances)) +
				169	self.FAR_DISTANCE * num_missing) / self._num_lists
				170	if avg_distance > 0:
				171	coalesced.append(Neighbor(s, t, avg_distance))
				172	else:
				173	coalesced.append(Neighbor(t, s, avg_distance))
				174	return coalesced
				175
				176	def ClusterToList(self, size_map=None):
				177	"""Merge the clusters with the smallest distances.
				178
				179	Args:
				180	size_map ({symbol: size} or None): Map symbol names to their size. Cluster
				181	growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to
				182	be zero and cluster growth is not stopped.
				183
				184	Returns:
				185	An ordered list of symbols from AddSymbolLists, appropriately clustered.
				186	"""
				187	if size_map:
				188	self._symbol_size = lambda s: size_map[s]
				189	if not self._num_lists or not self._neighbors:
				190	# Some sort of trivial set of symbol lists, such as all being
				191	# length 1. Return an empty ordering.
				192	return []
				193	logging.info('Sorting %s neighbors', len(self._neighbors))
				194	self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst))
				195	logging.info('Clustering...')
				196	count = 0
				197	while self._neighbors:
				198	count += 1
				199	if not (count % 1e6):
				200	logging.info('tock')
				201	neighbor = self._neighbors.pop()
				202	src = self.ClusterOf(neighbor.src)
				203	dst = self.ClusterOf(neighbor.dst)
				204	if (src == dst or
				205	src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE):
				206	continue
				207	self.Combine(src, dst)
				208	if size_map:
				209	clusters_by_size = sorted(list(set(self._cluster_map.values())),
				210	key=lambda c: -c.binary_size)
				211	else:
				212	clusters_by_size = sorted(list(set(self._cluster_map.values())),
				213	key=lambda c: -len(c.syms))
				214	logging.info('Produced %s clusters', len(clusters_by_size))
				215	logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
				216	for c in clusters_by_size[:4]])
				217	logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
				218	for c in clusters_by_size[-4:]])
				219	ordered_syms = []
				220	for c in clusters_by_size:
				221	ordered_syms.extend(c.syms)
				222	assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!'
				223	return ordered_syms
				224
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	225	def _GetOffsetSymbolName(processor, dump_offset):
				226	dump_offset_to_symbol_info = \
Christopher Grant	dfe1bac	2019-07-05 13:34:10	[diff] [blame]	227	processor.GetDumpOffsetToSymboInfolIncludingWhitelist()
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	228	offset_to_primary = processor.OffsetToPrimaryMap()
Benoit Lize	0c36952	2021-08-27 14:52:19	[diff] [blame]	229	idx = dump_offset // 2
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	230	assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), (
				231	'Dump offset out of binary range')
				232	symbol_info = dump_offset_to_symbol_info[idx]
				233	assert symbol_info, ('A return address (offset = 0x{:08x}) does not map '
				234	'to any symbol'.format(dump_offset))
				235	assert symbol_info.offset in offset_to_primary, (
				236	'Offset not found in primary map!')
				237	return offset_to_primary[symbol_info.offset].name
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	238
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	239	def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False):
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	240	raw_offsets = profiles.GetProcessOffsetLists()
				241	process_symbols = collections.defaultdict(list)
				242	seen_symbols = set()
				243	for p in raw_offsets:
				244	for offsets in raw_offsets[p]:
				245	symbol_names = processor.GetOrderedSymbols(
				246	processor.GetReachedOffsetsFromDump(offsets))
				247	process_symbols[p].append(symbol_names)
				248	seen_symbols \|= set(symbol_names)
				249	if limit_cluster_size:
				250	name_map = processor.NameToSymbolMap()
				251	size_map = {name: name_map[name].size for name in seen_symbols}
				252	else:
				253	size_map = None
				254
				255	# Process names from the profile dumps that are treated specially.
				256	_RENDERER = 'renderer'
				257	_BROWSER = 'browser'
				258
				259	assert _RENDERER in process_symbols
				260	assert _BROWSER in process_symbols
				261
				262	renderer_clustering = Clustering.ClusteredSymbolLists(
				263	process_symbols[_RENDERER], size_map)
				264	browser_clustering = Clustering.ClusteredSymbolLists(
				265	process_symbols[_BROWSER], size_map)
				266	other_lists = []
				267	for process, syms in process_symbols.items():
				268	if process not in (_RENDERER, _BROWSER):
				269	other_lists.extend(syms)
				270	if other_lists:
				271	other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map)
				272	else:
				273	other_clustering = []
				274
				275	# Start with the renderer cluster to favor rendering performance.
Alice Wang	689ad13	2022-06-23 08:57:49	[diff] [blame]	276	final_ordering = list(renderer_clustering)
Matthew Cary	91df979	2018-11-30 14:35:15	[diff] [blame]	277	seen = set(final_ordering)
				278	final_ordering.extend(s for s in browser_clustering if s not in seen)
				279	seen \|= set(browser_clustering)
				280	final_ordering.extend(s for s in other_clustering if s not in seen)
				281
				282	return final_ordering
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	283
Rasika Navarange	b039ea18	2024-03-05 15:58:10	[diff] [blame]	284
				285	def ClusterOffsets(profiles, processor, limit_cluster_size=False):
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	286	"""Cluster profile offsets.
				287
				288	Args:
				289	profiles (ProfileManager) Manager of the profile dump files.
				290	processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
Monica Basta	99c101fa	2019-05-21 13:50:05	[diff] [blame]	291
				292	Returns:
				293	A list of clustered symbol offsets.
				294	"""
Rasika Navarange	b039ea18	2024-03-05 15:58:10	[diff] [blame]	295	return _ClusterOffsetsLists(profiles, processor, limit_cluster_size)