tools/cygprofile/cluster.py - chromium/src - Git at Google

 # Copyright 2018 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Clustering for function call-graph.

 See the Clustering class for a detailed description.
 """

 import collections
 import itertools
 import logging

 Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist'))
 CalleeInfo = collections.namedtuple('CalleeInfo',
                                     ('index', 'callee_symbol',
                                      'misses', 'caller_and_count'))
 CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count'))


 class Clustering:
   """Cluster symbols.

   We are given a list of the first function calls, ordered by
   time. There are multiple lists: different benchmarks run multiple
   times, as well as list from startup and then a second list after
   startup (5 seconds) that runs until the benchmark memory dump.

   We have evidence (see below) that this simple ordering of code from a
   single profiling run (a load of a website) improves performance,
   presumably by improving code locality. To reconstruct this ordering
   using profiling information from multiple files, we cluster. Doing
   this clustering over multiple runs on the speedometer benchmark
   recovered speedometer performance compared with the legacy benchmark.

   For each offset list, we record the distances between each symbol and
   its neighborhood of the following k symbols (k=19, chosen
   arbitrarily). For example, if we have an offset list of symbols
   'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e,
   3), etc. Then we average distances of a given neighbor pair over all
   seen symbol lists. If we see an inversion (for example, (b->a, 3), we
   use this as a distance of -3). For each file that a given pair does
   not appear, that is, if the pair does not appear in that file or they
   are separated by 20 symbols, we use a large distance D (D=1000). The
   distances are then averages over all files. If the average is
   negative, the neighbor pair is inverted and the distance flipped. The
   idea is that if two symbols appear near each other in all profiling
   runs, there is high confidence that they are usually called
   together. If they don't appear near in some runs, there is less
   confidence that they should be colocated. Symbol distances are taken
   only as following distances to avoid confusing double-counting
   possibilities as well as to give a clear ordering to combining
   clusters.

   Neighbors are sorted, and starting with the shortest distance, symbols
   are coalesced into clusters. If the neighbor pair is (a->b), the
   clusters containing a and b are combined in that order. If a and b are
   already in the same cluster, nothing happens. After processing all
   neighbors there is usually only one cluster; if there are multiple
   clusters they are combined in order from largest to smallest (although
   that choice may not matter).

   Cluster merging may optionally be halted if they get above the size
   of an android page. As of November 2018 this slightly reduces
   performance and should not be used (1.7% decline in speedometer2,
   450K native library memory regression).
   """
   NEIGHBOR_DISTANCE = 20
   FAR_DISTANCE = 1000
   MAX_CLUSTER_SIZE = 4096  # 4k pages on android.

   class _Cluster:
     def __init__(self, syms, size):
       assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster'
       self._syms = syms
       self._size = size

     @property
     def syms(self):
       return self._syms

     @property
     def binary_size(self):
       return self._size

   @classmethod
   def ClusteredSymbolLists(cls, sym_lists, size_map):
     c = cls()
     c.AddSymbolLists(sym_lists)
     return c.ClusterToList(size_map)

   def __init__(self):
     self._num_lists = None
     self._neighbors = None
     self._cluster_map = {}
     self._symbol_size = lambda _: 0  # Maps a symbol to a size.

   def _MakeCluster(self, syms):
     c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms))
     for s in syms:
       self._cluster_map[s] = c
     return c

   def ClusterOf(self, s):
     if isinstance(s, self._Cluster):
       assert self._cluster_map[s.syms[0]] == s
       return s
     if s in self._cluster_map:
       return self._cluster_map[s]
     return self._MakeCluster([s])

   def Combine(self, a, b):
     """Combine clusters.

     Args:
       a, b: Clusters or str. The canonical cluster (ClusterOf) will be
         used to do the combining.

     Returns:
       A merged cluster from a and b, or None if a and b are in the same cluster.
     """
     canonical_a = self.ClusterOf(a)
     canonical_b = self.ClusterOf(b)
     if canonical_a == canonical_b:
       return None
     return self._MakeCluster(canonical_a._syms + canonical_b._syms)

   def AddSymbolLists(self, sym_lists):
     self._num_lists = len(sym_lists)
     self._neighbors = self._CoalesceNeighbors(
         self._ConstructNeighbors(sym_lists))

   def _ConstructNeighbors(self, sym_lists):
     neighbors = []
     for sym_list in sym_lists:
       for i, s in enumerate(sym_list):
         for j in range(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))):
           if s == sym_list[j]:
             # Free functions that are static inline seem to be the only
             # source of these duplicates.
             continue
           neighbors.append(Neighbor(s, sym_list[j], j - i))
     logging.info('Constructed %s symbol neighbors', len(neighbors))
     return neighbors

   def _CoalesceNeighbors(self, neighbors):
     pairs = collections.defaultdict(list)
     for n in neighbors:
       pairs[(n.src, n.dst)].append(n.dist)
     coalesced = []
     logging.info('Will coalesce over %s neighbor pairs', len(pairs))
     count = 0
     for (s, t) in pairs:
       assert s != t, '{} != {}'.format(s, t)
       if (t, s) in pairs and t < s:
         # Only process each unordered pair once.
         continue
       count += 1
       if not (count % 1e6):
         logging.info('tick')
       distances = []
       if (s, t) in pairs:
         distances.extend(pairs[(s, t)])
       if (t, s) in pairs:
         distances.extend(-d for d in pairs[(t, s)])
       if distances:
         num_missing = self._num_lists - len(distances)
         avg_distance = (float(sum(distances)) +
                         self.FAR_DISTANCE * num_missing) / self._num_lists
         if avg_distance > 0:
           coalesced.append(Neighbor(s, t, avg_distance))
         else:
           coalesced.append(Neighbor(t, s, avg_distance))
     return coalesced

   def ClusterToList(self, size_map=None):
     """Merge the clusters with the smallest distances.

     Args:
       size_map ({symbol: size} or None): Map symbol names to their size. Cluster
         growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to
         be zero and cluster growth is not stopped.

     Returns:
       An ordered list of symbols from AddSymbolLists, appropriately clustered.
     """
     if size_map:
       self._symbol_size = lambda s: size_map[s]
     if not self._num_lists or not self._neighbors:
       # Some sort of trivial set of symbol lists, such as all being
       # length 1. Return an empty ordering.
       return []
     logging.info('Sorting %s neighbors', len(self._neighbors))
     self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst))
     logging.info('Clustering...')
     count = 0
     while self._neighbors:
       count += 1
       if not (count % 1e6):
         logging.info('tock')
       neighbor = self._neighbors.pop()
       src = self.ClusterOf(neighbor.src)
       dst = self.ClusterOf(neighbor.dst)
       if (src == dst or
           src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE):
         continue
       self.Combine(src, dst)
     if size_map:
       clusters_by_size = sorted(list(set(self._cluster_map.values())),
                                 key=lambda c: -c.binary_size)
     else:
       clusters_by_size = sorted(list(set(self._cluster_map.values())),
                                 key=lambda c: -len(c.syms))
     logging.info('Produced %s clusters', len(clusters_by_size))
     logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
                                    for c in clusters_by_size[:4]])
     logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
                                       for c in clusters_by_size[-4:]])
     ordered_syms = []
     for c in clusters_by_size:
       ordered_syms.extend(c.syms)
     assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!'
     return ordered_syms

 def _GetOffsetSymbolName(processor, dump_offset):
   dump_offset_to_symbol_info = \
       processor.GetDumpOffsetToSymboInfolIncludingWhitelist()
   offset_to_primary = processor.OffsetToPrimaryMap()
   idx = dump_offset // 2
   assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), (
       'Dump offset out of binary range')
   symbol_info = dump_offset_to_symbol_info[idx]
   assert symbol_info, ('A return address (offset = 0x{:08x}) does not map '
                        'to any symbol'.format(dump_offset))
   assert symbol_info.offset in offset_to_primary, (
       'Offset not found in primary map!')
   return offset_to_primary[symbol_info.offset].name

 def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False):
   raw_offsets = profiles.GetProcessOffsetLists()
   process_symbols = collections.defaultdict(list)
   seen_symbols = set()
   for p in raw_offsets:
     for offsets in raw_offsets[p]:
       symbol_names = processor.GetOrderedSymbols(
           processor.GetReachedOffsetsFromDump(offsets))
       process_symbols[p].append(symbol_names)
       seen_symbols |= set(symbol_names)
   if limit_cluster_size:
     name_map = processor.NameToSymbolMap()
     size_map = {name: name_map[name].size for name in seen_symbols}
   else:
     size_map = None

   # Process names from the profile dumps that are treated specially.
   _RENDERER = 'renderer'
   _BROWSER = 'browser'

   assert _RENDERER in process_symbols
   assert _BROWSER in process_symbols

   renderer_clustering = Clustering.ClusteredSymbolLists(
       process_symbols[_RENDERER], size_map)
   browser_clustering = Clustering.ClusteredSymbolLists(
       process_symbols[_BROWSER], size_map)
   other_lists = []
   for process, syms in process_symbols.items():
     if process not in (_RENDERER, _BROWSER):
       other_lists.extend(syms)
   if other_lists:
     other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map)
   else:
     other_clustering = []

   # Start with the renderer cluster to favor rendering performance.
   final_ordering = list(renderer_clustering)
   seen = set(final_ordering)
   final_ordering.extend(s for s in browser_clustering if s not in seen)
   seen |= set(browser_clustering)
   final_ordering.extend(s for s in other_clustering if s not in seen)

   return final_ordering


 def ClusterOffsets(profiles, processor, limit_cluster_size=False):
   """Cluster profile offsets.

   Args:
     profiles (ProfileManager) Manager of the profile dump files.
     processor (SymbolOffsetProcessor) Symbol table processor for the dumps.

   Returns:
     A list of clustered symbol offsets.
 """
   return _ClusterOffsetsLists(profiles, processor, limit_cluster_size)
	# Copyright 2018 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Clustering for function call-graph.

	See the Clustering class for a detailed description.
	"""

	import collections
	import itertools
	import logging

	Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist'))
	CalleeInfo = collections.namedtuple('CalleeInfo',
	('index', 'callee_symbol',
	'misses', 'caller_and_count'))
	CallerInfo = collections.namedtuple('CallerInfo', ('caller_symbol', 'count'))


	class Clustering:
	"""Cluster symbols.

	We are given a list of the first function calls, ordered by
	time. There are multiple lists: different benchmarks run multiple
	times, as well as list from startup and then a second list after
	startup (5 seconds) that runs until the benchmark memory dump.

	We have evidence (see below) that this simple ordering of code from a
	single profiling run (a load of a website) improves performance,
	presumably by improving code locality. To reconstruct this ordering
	using profiling information from multiple files, we cluster. Doing
	this clustering over multiple runs on the speedometer benchmark
	recovered speedometer performance compared with the legacy benchmark.

	For each offset list, we record the distances between each symbol and
	its neighborhood of the following k symbols (k=19, chosen
	arbitrarily). For example, if we have an offset list of symbols
	'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e,
	3), etc. Then we average distances of a given neighbor pair over all
	seen symbol lists. If we see an inversion (for example, (b->a, 3), we
	use this as a distance of -3). For each file that a given pair does
	not appear, that is, if the pair does not appear in that file or they
	are separated by 20 symbols, we use a large distance D (D=1000). The
	distances are then averages over all files. If the average is
	negative, the neighbor pair is inverted and the distance flipped. The
	idea is that if two symbols appear near each other in all profiling
	runs, there is high confidence that they are usually called
	together. If they don't appear near in some runs, there is less
	confidence that they should be colocated. Symbol distances are taken
	only as following distances to avoid confusing double-counting
	possibilities as well as to give a clear ordering to combining
	clusters.

	Neighbors are sorted, and starting with the shortest distance, symbols
	are coalesced into clusters. If the neighbor pair is (a->b), the
	clusters containing a and b are combined in that order. If a and b are
	already in the same cluster, nothing happens. After processing all
	neighbors there is usually only one cluster; if there are multiple
	clusters they are combined in order from largest to smallest (although
	that choice may not matter).

	Cluster merging may optionally be halted if they get above the size
	of an android page. As of November 2018 this slightly reduces
	performance and should not be used (1.7% decline in speedometer2,
	450K native library memory regression).
	"""
	NEIGHBOR_DISTANCE = 20
	FAR_DISTANCE = 1000
	MAX_CLUSTER_SIZE = 4096 # 4k pages on android.

	class _Cluster:
	def __init__(self, syms, size):
	assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster'
	self._syms = syms
	self._size = size

	@property
	def syms(self):
	return self._syms

	@property
	def binary_size(self):
	return self._size

	@classmethod
	def ClusteredSymbolLists(cls, sym_lists, size_map):
	c = cls()
	c.AddSymbolLists(sym_lists)
	return c.ClusterToList(size_map)

	def __init__(self):
	self._num_lists = None
	self._neighbors = None
	self._cluster_map = {}
	self._symbol_size = lambda _: 0 # Maps a symbol to a size.

	def _MakeCluster(self, syms):
	c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms))
	for s in syms:
	self._cluster_map[s] = c
	return c

	def ClusterOf(self, s):
	if isinstance(s, self._Cluster):
	assert self._cluster_map[s.syms[0]] == s
	return s
	if s in self._cluster_map:
	return self._cluster_map[s]
	return self._MakeCluster([s])

	def Combine(self, a, b):
	"""Combine clusters.

	Args:
	a, b: Clusters or str. The canonical cluster (ClusterOf) will be
	used to do the combining.

	Returns:
	A merged cluster from a and b, or None if a and b are in the same cluster.
	"""
	canonical_a = self.ClusterOf(a)
	canonical_b = self.ClusterOf(b)
	if canonical_a == canonical_b:
	return None
	return self._MakeCluster(canonical_a._syms + canonical_b._syms)

	def AddSymbolLists(self, sym_lists):
	self._num_lists = len(sym_lists)
	self._neighbors = self._CoalesceNeighbors(
	self._ConstructNeighbors(sym_lists))

	def _ConstructNeighbors(self, sym_lists):
	neighbors = []
	for sym_list in sym_lists:
	for i, s in enumerate(sym_list):
	for j in range(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))):
	if s == sym_list[j]:
	# Free functions that are static inline seem to be the only
	# source of these duplicates.
	continue
	neighbors.append(Neighbor(s, sym_list[j], j - i))
	logging.info('Constructed %s symbol neighbors', len(neighbors))
	return neighbors

	def _CoalesceNeighbors(self, neighbors):
	pairs = collections.defaultdict(list)
	for n in neighbors:
	pairs[(n.src, n.dst)].append(n.dist)
	coalesced = []
	logging.info('Will coalesce over %s neighbor pairs', len(pairs))
	count = 0
	for (s, t) in pairs:
	assert s != t, '{} != {}'.format(s, t)
	if (t, s) in pairs and t < s:
	# Only process each unordered pair once.
	continue
	count += 1
	if not (count % 1e6):
	logging.info('tick')
	distances = []
	if (s, t) in pairs:
	distances.extend(pairs[(s, t)])
	if (t, s) in pairs:
	distances.extend(-d for d in pairs[(t, s)])
	if distances:
	num_missing = self._num_lists - len(distances)
	avg_distance = (float(sum(distances)) +
	self.FAR_DISTANCE * num_missing) / self._num_lists
	if avg_distance > 0:
	coalesced.append(Neighbor(s, t, avg_distance))
	else:
	coalesced.append(Neighbor(t, s, avg_distance))
	return coalesced

	def ClusterToList(self, size_map=None):
	"""Merge the clusters with the smallest distances.

	Args:
	size_map ({symbol: size} or None): Map symbol names to their size. Cluster
	growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to
	be zero and cluster growth is not stopped.

	Returns:
	An ordered list of symbols from AddSymbolLists, appropriately clustered.
	"""
	if size_map:
	self._symbol_size = lambda s: size_map[s]
	if not self._num_lists or not self._neighbors:
	# Some sort of trivial set of symbol lists, such as all being
	# length 1. Return an empty ordering.
	return []
	logging.info('Sorting %s neighbors', len(self._neighbors))
	self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst))
	logging.info('Clustering...')
	count = 0
	while self._neighbors:
	count += 1
	if not (count % 1e6):
	logging.info('tock')
	neighbor = self._neighbors.pop()
	src = self.ClusterOf(neighbor.src)
	dst = self.ClusterOf(neighbor.dst)
	if (src == dst or
	src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE):
	continue
	self.Combine(src, dst)
	if size_map:
	clusters_by_size = sorted(list(set(self._cluster_map.values())),
	key=lambda c: -c.binary_size)
	else:
	clusters_by_size = sorted(list(set(self._cluster_map.values())),
	key=lambda c: -len(c.syms))
	logging.info('Produced %s clusters', len(clusters_by_size))
	logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
	for c in clusters_by_size[:4]])
	logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
	for c in clusters_by_size[-4:]])
	ordered_syms = []
	for c in clusters_by_size:
	ordered_syms.extend(c.syms)
	assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!'
	return ordered_syms

	def _GetOffsetSymbolName(processor, dump_offset):
	dump_offset_to_symbol_info = \
	processor.GetDumpOffsetToSymboInfolIncludingWhitelist()
	offset_to_primary = processor.OffsetToPrimaryMap()
	idx = dump_offset // 2
	assert dump_offset >= 0 and idx < len(dump_offset_to_symbol_info), (
	'Dump offset out of binary range')
	symbol_info = dump_offset_to_symbol_info[idx]
	assert symbol_info, ('A return address (offset = 0x{:08x}) does not map '
	'to any symbol'.format(dump_offset))
	assert symbol_info.offset in offset_to_primary, (
	'Offset not found in primary map!')
	return offset_to_primary[symbol_info.offset].name

	def _ClusterOffsetsLists(profiles, processor, limit_cluster_size=False):
	raw_offsets = profiles.GetProcessOffsetLists()
	process_symbols = collections.defaultdict(list)
	seen_symbols = set()
	for p in raw_offsets:
	for offsets in raw_offsets[p]:
	symbol_names = processor.GetOrderedSymbols(
	processor.GetReachedOffsetsFromDump(offsets))
	process_symbols[p].append(symbol_names)
	seen_symbols \|= set(symbol_names)
	if limit_cluster_size:
	name_map = processor.NameToSymbolMap()
	size_map = {name: name_map[name].size for name in seen_symbols}
	else:
	size_map = None

	# Process names from the profile dumps that are treated specially.
	_RENDERER = 'renderer'
	_BROWSER = 'browser'

	assert _RENDERER in process_symbols
	assert _BROWSER in process_symbols

	renderer_clustering = Clustering.ClusteredSymbolLists(
	process_symbols[_RENDERER], size_map)
	browser_clustering = Clustering.ClusteredSymbolLists(
	process_symbols[_BROWSER], size_map)
	other_lists = []
	for process, syms in process_symbols.items():
	if process not in (_RENDERER, _BROWSER):
	other_lists.extend(syms)
	if other_lists:
	other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map)
	else:
	other_clustering = []

	# Start with the renderer cluster to favor rendering performance.
	final_ordering = list(renderer_clustering)
	seen = set(final_ordering)
	final_ordering.extend(s for s in browser_clustering if s not in seen)
	seen \|= set(browser_clustering)
	final_ordering.extend(s for s in other_clustering if s not in seen)

	return final_ordering


	def ClusterOffsets(profiles, processor, limit_cluster_size=False):
	"""Cluster profile offsets.

	Args:
	profiles (ProfileManager) Manager of the profile dump files.
	processor (SymbolOffsetProcessor) Symbol table processor for the dumps.

	Returns:
	A list of clustered symbol offsets.
	"""
	return _ClusterOffsetsLists(profiles, processor, limit_cluster_size)