Blame - experimental/tracing/bin/diff_heap_profiler.py - catapult

blob: 5b3243c246374e1a3ebb6e5285fb0e0cbf830e22 [file] [log] [blame]

etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright 2017 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	import argparse
				7	import gzip
				8	import json
				9	import os
				10	import shutil
				11
				12	_OUTPUT_DIR = 'output'
				13	_OUTPUT_GRAPH_DIR = os.path.join(_OUTPUT_DIR, 'graph')
				14
				15
				16	class Process(object):
				17
				18	def __init__(self):
				19	self.pid = None
				20	self.name = None
				21	self.labels = None
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame]	22	self.types = {}
				23	self.strings = {}
				24	self.stackframes = {}
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	25	self.allocators = None
				26	self.version = None
				27
				28
				29	class Entry(object):
				30
				31	def __init__(self):
				32	self.count = None
				33	self.size = None
				34	self.type = None
				35	self.stackframe = None
				36
				37
				38	class GraphDump(object):
				39
				40	def __init__(self):
				41	self.pid = None
				42	self.name = None
				43	self.labels = None
				44	self.heap = None
				45	self.root = ''
				46	self.leaks = ''
				47	self.leak_stackframes = 0
				48	self.leak_objects = 0
				49
etienneb	12fd4ba	2017-09-08 19:32:06	[diff] [blame]	50	def OpenTraceFile(file_path, mode):
				51	if file_path.endswith('.gz'):
				52	return gzip.open(file_path, mode + 'b')
				53	else:
				54	return open(file_path, mode + 't')
				55
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	56	def FindMemoryDumps(filename):
				57	processes = {}
				58
etienneb	12fd4ba	2017-09-08 19:32:06	[diff] [blame]	59	with OpenTraceFile(filename, 'r') as f:
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	60	data = json.loads(f.read().decode('ascii'))
				61
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	62	for event in data['traceEvents']:
				63	pid = event['pid']
				64	if pid not in processes:
				65	processes[pid] = Process()
				66	processes[pid].pid = pid
				67	process = processes[pid]
				68
				69	# Retrieve process informations.
				70	if event['ph'] == 'M':
				71	if event['name'] == 'process_name':
				72	process.name = event['args']['name']
				73	if event['name'] == 'process_labels':
				74	process.labels = event['args']['labels']
				75
				76	if event['name'] == 'typeNames':
				77	process.types = {}
				78	for type_id, t in event['args']['typeNames'].iteritems():
				79	process.types[int(type_id)] = t
				80
				81	if event['name'] == 'stackFrames':
				82	process.stackframes = {}
				83	for stack_id, s in event['args']['stackFrames'].iteritems():
				84	new_stackframe = {}
				85	new_stackframe['name'] = s['name']
				86	if 'parent' in s:
				87	new_stackframe['parent'] = int(s['parent'])
				88	process.stackframes[int(stack_id)] = new_stackframe
				89
				90	# Look for a detailed memory dump event.
				91	if not ((event['name'] == 'periodic_interval' or
				92	event['name'] == 'explicitly_triggered') and
				93	event['args']['dumps']['level_of_detail'] == 'detailed'):
				94	continue
				95
				96	# Check for a memory dump V1.
				97	if u'heaps' in event['args']['dumps']:
				98	# Get the first memory dump.
				99	if not process.allocators:
				100	process.version = 1
				101	process.allocators = event['args']['dumps']['heaps']
				102
				103	# Check for a memory dump V2.
				104	# See format: [chromium] src/base/trace_event/heap_profiler_event_writer.h
				105	if u'heaps_v2' in event['args']['dumps']:
				106	# Memory dump format V2 is dumping information incrementally. Update
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame]	107	# the cumulated indexes.
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	108	maps = event['args']['dumps']['heaps_v2']['maps']
				109	for string in maps['strings']:
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame]	110	process.strings[string['id']] = string['string']
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	111
				112	for node in maps['nodes']:
				113	node_v1 = {}
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame]	114	node_v1['name'] = process.strings[node['name_sid']]
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	115	if 'parent' in node:
				116	node_v1['parent'] = node['parent']
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame]	117	process.stackframes[node['id']] = node_v1
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	118
				119	for t in maps['types']:
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame]	120	process.types[t['id']] = process.strings[t['name_sid']]
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	121
				122	# Get the first memory dump.
				123	if not process.allocators:
				124	dump = event['args']['dumps']
				125	process.version = 2
				126	process.allocators = dump['heaps_v2']['allocators']
				127
				128	# Remove processes with incomplete memory dump.
				129	for pid, process in processes.items():
				130	if not (process.allocators and process.stackframes and process.types):
				131	del processes[pid]
				132
				133	return processes
				134
				135
				136	def ResolveMemoryDumpFields(entries, stackframes, types):
				137	def ResolveStackTrace(stack_id, stackframes):
				138	stackframe = stackframes[stack_id]
				139	tail = ()
				140	if 'parent' in stackframe:
				141	tail = ResolveStackTrace(stackframe['parent'], stackframes)
				142	name = stackframe['name'].replace('\r', '').replace('\n', '')
				143	return (name,) + tail
				144
				145	def ResolveType(type_id, types):
				146	return types[type_id]
				147
				148	for entry in entries:
				149	entry.stackframe = ResolveStackTrace(entry.stackframe, stackframes)
				150	entry.type = ResolveType(entry.type, types)
				151
				152
				153	def IncrementHeapEntry(stack, count, size, typename, root):
				154	if not stack:
				155	root['count'] += count
				156	root['size'] += size
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	157	if typename not in root['count_by_type']:
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	158	root['count_by_type'][typename] = 0
				159	root['count_by_type'][typename] += count
				160	else:
				161	top = stack[-1]
				162	tail = stack[:-1]
				163
				164	if top not in root['children']:
				165	new_node = {}
				166	new_node['count'] = 0
				167	new_node['size'] = 0
				168	new_node['children'] = {}
				169	new_node['count_by_type'] = {}
				170	root['children'][top] = new_node
				171
				172	IncrementHeapEntry(tail, count, size, typename, root['children'][top])
				173
				174
				175	def CanonicalHeapEntries(root):
				176	total_count = 0
				177	total_size = 0
				178	for child in root['children'].itervalues():
				179	total_count += child['count']
				180	total_size += child['size']
				181	root['count'] -= total_count
				182	root['size'] -= total_size
				183
				184	for typename in root['count_by_type']:
				185	total_count_for_type = 0
				186	for child in root['children'].itervalues():
				187	if typename in child['count_by_type']:
				188	total_count_for_type += child['count_by_type'][typename]
				189	root['count_by_type'][typename] -= total_count_for_type
				190
				191	for child in root['children'].itervalues():
				192	CanonicalHeapEntries(child)
				193
				194
				195	def FindLeaks(root, stack, leaks, threshold, size_threshold):
				196	for frame in root['children']:
				197	FindLeaks(root['children'][frame], [frame] + stack, leaks, threshold,
				198	size_threshold)
				199
				200	if root['count'] > threshold and root['size'] > size_threshold:
etienneb	1b94de3	2017-09-18 19:57:29	[diff] [blame]	201	leaks.append({'count': root['count'],
				202	'size': root['size'],
				203	'count_by_type': root['count_by_type'],
				204	'stackframes': stack})
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	205
				206	def DumpTree(root, frame, output, threshold, size_threshold):
				207	output.write('\n{ \"name\": \"%s\",' % frame)
				208	if root['count'] > threshold and root['count'] > size_threshold:
				209	output.write(' \"size\": \"%s\",' % root['size'])
				210	output.write(' \"count\": \"%s\",' % root['count'])
				211	output.write(' \"children\": [')
				212	is_first = True
				213	for frame, child in root['children'].items():
				214	if is_first:
				215	is_first = False
				216	else:
				217	output.write(',')
				218
				219	DumpTree(child, frame, output, threshold, size_threshold)
				220	output.write(']')
				221	output.write('}')
				222
				223
				224	def GetEntries(heap, process):
				225	"""
				226	Returns all entries in a heap, after filtering out unknown entries, and doing
				227	some post processing to extract the relevant fields.
				228	"""
				229	if not process:
				230	return []
				231
				232	entries = []
				233	if process.version == 1:
				234	for raw_entry in process.allocators[heap]['entries']:
				235	# Cumulative sizes and types are skipped. see:
				236	# https://chromium.googlesource.com/chromium/src/+/a990af190304be5bf38b120799c594df5a293518/base/trace_event/heap_profiler_heap_dump_writer.cc#294
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	237	if 'type' not in raw_entry or not raw_entry['bt']:
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	238	continue
				239
				240	entry = Entry()
				241	entry.count = int(raw_entry['count'], 16)
				242	entry.size = int(raw_entry['size'], 16)
				243	entry.type = int(raw_entry['type'])
				244	entry.stackframe = int(raw_entry['bt'])
				245	entries.append(entry)
				246
				247	elif process.version == 2:
				248	raw_entries = zip(process.allocators[heap]['counts'],
				249	process.allocators[heap]['sizes'],
				250	process.allocators[heap]['types'],
				251	process.allocators[heap]['nodes'])
				252	for (raw_count, raw_size, raw_type, raw_stackframe) in raw_entries:
				253	entry = Entry()
				254	entry.count = raw_count
				255	entry.size = raw_size
				256	entry.type = raw_type
				257	entry.stackframe = raw_stackframe
				258	entries.append(entry)
				259
				260	# Resolve fields by looking into indexes
				261	ResolveMemoryDumpFields(entries, process.stackframes, process.types)
				262
				263	return entries
				264
				265
				266	def FilterProcesses(processes, filter_by_name, filter_by_labels):
				267	remaining_processes = {}
				268	for pid, process in processes.iteritems():
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	269	if filter_by_name and process.name != filter_by_name:
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	270	continue
				271	if (filter_by_labels and
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	272	(not process.labels or filter_by_labels not in process.labels)):
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	273	continue
				274	remaining_processes[pid] = process
				275
				276	return remaining_processes
				277
				278
				279	def FindRelevantProcesses(start_trace, end_trace,
				280	filter_by_name,
				281	filter_by_labels,
				282	match_by_labels):
				283	# Retrieve the processes and the associated memory dump.
				284	end_processes = FindMemoryDumps(end_trace)
				285	end_processes = FilterProcesses(end_processes, filter_by_name,
				286	filter_by_labels)
				287
				288	start_processes = None
				289	if start_trace:
				290	start_processes = FindMemoryDumps(start_trace)
				291	start_processes = FilterProcesses(start_processes, filter_by_name,
				292	filter_by_labels)
				293
				294	# Build a sequence of pair of processes to be compared.
				295	processes = []
				296	if not start_processes:
				297	# Only keep end-processes.
				298	for pid, end_process in end_processes.iteritems():
				299	processes.append((None, end_process))
				300	elif match_by_labels:
				301	# Processes are paired based on name/labels.
				302	for pid, end_process in end_processes.iteritems():
				303	matching_start_process = None
				304	for pid, start_process in start_processes.iteritems():
				305	if (start_process.name == end_process.name and
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	306	(start_process.name in ['Browser', 'GPU'] or
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	307	start_process.labels == end_process.labels)):
				308	matching_start_process = start_process
				309
				310	if matching_start_process:
				311	processes.append((matching_start_process, end_process))
				312	else:
				313	# Processes are paired based on their PID.
				314	relevant_pids = set(end_processes.keys()) & set(start_processes.keys())
				315	for pid in relevant_pids:
				316	start_process = start_processes[pid]
				317	end_process = end_processes[pid]
				318	processes.append((start_process, end_process))
				319
				320	return processes
				321
				322
				323	def BuildGraphDumps(processes, threshold, size_threshold):
				324	"""
				325	Build graph for a sequence of pair of processes.
				326	If start_process is None, counts objects in end_trace.
				327	Otherwise, counts objects present in end_trace, but not in start_process.
				328	"""
				329
				330	graph_dumps = []
				331
				332	for (start_process, end_process) in processes:
				333	pid = end_process.pid
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	334	name = end_process.name if end_process.name else ''
				335	labels = end_process.labels if end_process.labels else ''
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	336	print 'Process[%d] %s: %s' % (pid, name, labels)
				337
				338	for heap in end_process.allocators:
				339	start_entries = GetEntries(heap, start_process)
				340	end_entries = GetEntries(heap, end_process)
				341
				342	graph = GraphDump()
				343	graph.pid = pid
				344	graph.name = name
				345	graph.labels = labels
				346	graph.heap = heap
				347	graph_dumps.append(graph)
				348
				349	# Do the math: diffing start and end memory dumps.
				350	root = {}
				351	root['count'] = 0
				352	root['size'] = 0
				353	root['children'] = {}
				354	root['count_by_type'] = {}
				355
				356	for entry in start_entries:
				357	if entry.type:
				358	IncrementHeapEntry(entry.stackframe, - entry.count, - entry.size,
				359	entry.type, root)
				360	for entry in end_entries:
				361	if entry.type:
				362	IncrementHeapEntry(entry.stackframe, entry.count, entry.size,
				363	entry.type, root)
				364
				365	CanonicalHeapEntries(root)
				366
				367	graph.root = root
				368
				369	# Find leaks
				370	leaks = []
				371	FindLeaks(root, [], leaks, threshold, size_threshold)
Erik Chen	0564bf2	2017-09-28 07:38:50	[diff] [blame^]	372	leaks.sort(reverse=True, key=lambda k: k['size'])
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	373
				374	if leaks:
				375	print ' %s: %d potential leaks found.' % (heap, len(leaks))
				376	graph.leaks = leaks
				377	graph.leak_stackframes = len(leaks)
				378	for leak in leaks:
etienneb	1b94de3	2017-09-18 19:57:29	[diff] [blame]	379	graph.leak_objects += leak['count']
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	380
				381	return graph_dumps
				382
				383
				384	def WritePotentialLeaks(graph_dumps):
				385	for graph in graph_dumps:
				386	if graph.leaks:
				387	filename = 'process_%d_%s-leaks.json' % (graph.pid, graph.heap)
				388	output_filename = os.path.join(_OUTPUT_DIR, filename)
				389	with open(output_filename, 'w') as output:
				390	json.dump(graph.leaks, output)
				391
				392
				393	def WriteGrahDumps(graph_dumps, threshold, size_threshold):
				394	for graph in graph_dumps:
				395	# Dump the remaining allocated objects tree.
				396	filename = 'process_%d_%s-objects.json' % (graph.pid, graph.heap)
				397	output_filename = os.path.join(_OUTPUT_GRAPH_DIR, filename)
				398	if graph.root:
				399	with open(output_filename, 'w') as output:
				400	DumpTree(graph.root, '.', output, threshold, size_threshold)
				401	graph.root = filename
				402
				403
				404	def WriteIndex(graph_dumps):
				405	output_filename = os.path.join(_OUTPUT_GRAPH_DIR, 'index.json')
				406	with open(output_filename, 'w') as output:
				407	json.dump([
				408	{'pid': graph.pid,
				409	'heap': graph.heap,
				410	'name': graph.name,
				411	'labels': graph.labels,
				412	'objects': graph.root,
				413	'potential leaks': graph.leak_stackframes,
				414	'objects leaked': graph.leak_objects,
				415	}
				416	for graph in graph_dumps], output)
				417
				418
				419	def WriteHTML():
				420	# Copy the HTML page.
				421	source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
				422	'diff_heap_profiler.html')
				423	destination = os.path.join(_OUTPUT_GRAPH_DIR, 'index.html')
				424	shutil.copyfile(source, destination)
				425
				426	# Copy the D3 library file.
				427	source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	428	os.path.pardir,
				429	os.path.pardir,
				430	os.path.pardir,
				431	'tracing',
				432	'third_party',
				433	'd3',
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	434	'd3.min.js')
				435	destination = os.path.join(_OUTPUT_GRAPH_DIR, 'd3.min.js')
				436	shutil.copyfile(source, destination)
				437
				438
				439	def Main():
				440	parser = argparse.ArgumentParser()
				441	parser.add_argument(
				442	'--flame-graph',
				443	action='store_true',
				444	help='Output a flame graph based on stackframe allocations')
				445	parser.add_argument(
				446	'--threshold',
				447	type=int,
				448	default=0,
				449	help='Objects threshold for being a potential memory leak')
				450	parser.add_argument(
				451	'--size-threshold',
				452	type=int,
				453	default=0,
				454	help='Size threshold for being a potential memory leak')
				455	parser.add_argument(
				456	'--filter-by-name',
				457	type=str,
				458	help='Only keep processes with name (i.e. Browser, Renderer, ...)')
				459	parser.add_argument(
				460	'--filter-by-labels',
				461	type=str,
				462	help='Only keep processes with matching labels')
				463	parser.add_argument(
				464	'--match-by-labels',
				465	action='store_true',
				466	help='Match processes between runs by labels')
				467	parser.add_argument(
				468	'trace',
				469	nargs='+',
				470	help='Trace files to be processed')
				471	options = parser.parse_args()
				472
				473	if options.threshold == 0 and options.size_threshold == 0:
				474	options.threshold = 1000
				475
				476	if len(options.trace) == 1:
				477	end_trace = options.trace[0]
				478	start_trace = None
				479	else:
				480	start_trace = options.trace[0]
				481	end_trace = options.trace[1]
				482
				483	if not os.path.exists(_OUTPUT_DIR):
				484	os.makedirs(_OUTPUT_DIR)
				485
				486	# Find relevant processes to be processed.
				487	processes = FindRelevantProcesses(start_trace, end_trace,
				488	options.filter_by_name,
				489	options.filter_by_labels,
				490	options.match_by_labels)
				491
				492	graph_dumps = BuildGraphDumps(processes, options.threshold,
				493	options.size_threshold)
				494
				495	WritePotentialLeaks(graph_dumps)
				496
				497	if options.flame_graph:
				498	if not os.path.exists(_OUTPUT_GRAPH_DIR):
				499	os.makedirs(_OUTPUT_GRAPH_DIR)
				500	WriteGrahDumps(graph_dumps, options.threshold, options.size_threshold)
				501	WriteIndex(graph_dumps)
				502	WriteHTML()
				503
				504	if __name__ == '__main__':
				505	Main()