Blame - experimental/tracing/bin/diff_heap_profiler.py - catapult

blob: b2a462ee447e2f488d3768f2ba9f23ed3b511185 [file] [log] [blame]

etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright 2017 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	import argparse
				7	import gzip
				8	import json
				9	import os
				10	import shutil
				11
				12	_OUTPUT_DIR = 'output'
				13	_OUTPUT_GRAPH_DIR = os.path.join(_OUTPUT_DIR, 'graph')
				14
				15
				16	class Process(object):
				17
				18	def __init__(self):
				19	self.pid = None
				20	self.name = None
				21	self.labels = None
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame^]	22	self.types = {}
				23	self.strings = {}
				24	self.stackframes = {}
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	25	self.allocators = None
				26	self.version = None
				27
				28
				29	class Entry(object):
				30
				31	def __init__(self):
				32	self.count = None
				33	self.size = None
				34	self.type = None
				35	self.stackframe = None
				36
				37
				38	class GraphDump(object):
				39
				40	def __init__(self):
				41	self.pid = None
				42	self.name = None
				43	self.labels = None
				44	self.heap = None
				45	self.root = ''
				46	self.leaks = ''
				47	self.leak_stackframes = 0
				48	self.leak_objects = 0
				49
				50
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	51	def FindMemoryDumps(filename):
				52	processes = {}
				53
				54	with gzip.open(filename, 'rb') as f:
				55	data = json.loads(f.read().decode('ascii'))
				56
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	57	for event in data['traceEvents']:
				58	pid = event['pid']
				59	if pid not in processes:
				60	processes[pid] = Process()
				61	processes[pid].pid = pid
				62	process = processes[pid]
				63
				64	# Retrieve process informations.
				65	if event['ph'] == 'M':
				66	if event['name'] == 'process_name':
				67	process.name = event['args']['name']
				68	if event['name'] == 'process_labels':
				69	process.labels = event['args']['labels']
				70
				71	if event['name'] == 'typeNames':
				72	process.types = {}
				73	for type_id, t in event['args']['typeNames'].iteritems():
				74	process.types[int(type_id)] = t
				75
				76	if event['name'] == 'stackFrames':
				77	process.stackframes = {}
				78	for stack_id, s in event['args']['stackFrames'].iteritems():
				79	new_stackframe = {}
				80	new_stackframe['name'] = s['name']
				81	if 'parent' in s:
				82	new_stackframe['parent'] = int(s['parent'])
				83	process.stackframes[int(stack_id)] = new_stackframe
				84
				85	# Look for a detailed memory dump event.
				86	if not ((event['name'] == 'periodic_interval' or
				87	event['name'] == 'explicitly_triggered') and
				88	event['args']['dumps']['level_of_detail'] == 'detailed'):
				89	continue
				90
				91	# Check for a memory dump V1.
				92	if u'heaps' in event['args']['dumps']:
				93	# Get the first memory dump.
				94	if not process.allocators:
				95	process.version = 1
				96	process.allocators = event['args']['dumps']['heaps']
				97
				98	# Check for a memory dump V2.
				99	# See format: [chromium] src/base/trace_event/heap_profiler_event_writer.h
				100	if u'heaps_v2' in event['args']['dumps']:
				101	# Memory dump format V2 is dumping information incrementally. Update
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame^]	102	# the cumulated indexes.
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	103	maps = event['args']['dumps']['heaps_v2']['maps']
				104	for string in maps['strings']:
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame^]	105	process.strings[string['id']] = string['string']
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	106
				107	for node in maps['nodes']:
				108	node_v1 = {}
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame^]	109	node_v1['name'] = process.strings[node['name_sid']]
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	110	if 'parent' in node:
				111	node_v1['parent'] = node['parent']
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame^]	112	process.stackframes[node['id']] = node_v1
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	113
				114	for t in maps['types']:
etienneb	94654a4	2017-06-29 21:43:18	[diff] [blame^]	115	process.types[t['id']] = process.strings[t['name_sid']]
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	116
				117	# Get the first memory dump.
				118	if not process.allocators:
				119	dump = event['args']['dumps']
				120	process.version = 2
				121	process.allocators = dump['heaps_v2']['allocators']
				122
				123	# Remove processes with incomplete memory dump.
				124	for pid, process in processes.items():
				125	if not (process.allocators and process.stackframes and process.types):
				126	del processes[pid]
				127
				128	return processes
				129
				130
				131	def ResolveMemoryDumpFields(entries, stackframes, types):
				132	def ResolveStackTrace(stack_id, stackframes):
				133	stackframe = stackframes[stack_id]
				134	tail = ()
				135	if 'parent' in stackframe:
				136	tail = ResolveStackTrace(stackframe['parent'], stackframes)
				137	name = stackframe['name'].replace('\r', '').replace('\n', '')
				138	return (name,) + tail
				139
				140	def ResolveType(type_id, types):
				141	return types[type_id]
				142
				143	for entry in entries:
				144	entry.stackframe = ResolveStackTrace(entry.stackframe, stackframes)
				145	entry.type = ResolveType(entry.type, types)
				146
				147
				148	def IncrementHeapEntry(stack, count, size, typename, root):
				149	if not stack:
				150	root['count'] += count
				151	root['size'] += size
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	152	if typename not in root['count_by_type']:
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	153	root['count_by_type'][typename] = 0
				154	root['count_by_type'][typename] += count
				155	else:
				156	top = stack[-1]
				157	tail = stack[:-1]
				158
				159	if top not in root['children']:
				160	new_node = {}
				161	new_node['count'] = 0
				162	new_node['size'] = 0
				163	new_node['children'] = {}
				164	new_node['count_by_type'] = {}
				165	root['children'][top] = new_node
				166
				167	IncrementHeapEntry(tail, count, size, typename, root['children'][top])
				168
				169
				170	def CanonicalHeapEntries(root):
				171	total_count = 0
				172	total_size = 0
				173	for child in root['children'].itervalues():
				174	total_count += child['count']
				175	total_size += child['size']
				176	root['count'] -= total_count
				177	root['size'] -= total_size
				178
				179	for typename in root['count_by_type']:
				180	total_count_for_type = 0
				181	for child in root['children'].itervalues():
				182	if typename in child['count_by_type']:
				183	total_count_for_type += child['count_by_type'][typename]
				184	root['count_by_type'][typename] -= total_count_for_type
				185
				186	for child in root['children'].itervalues():
				187	CanonicalHeapEntries(child)
				188
				189
				190	def FindLeaks(root, stack, leaks, threshold, size_threshold):
				191	for frame in root['children']:
				192	FindLeaks(root['children'][frame], [frame] + stack, leaks, threshold,
				193	size_threshold)
				194
				195	if root['count'] > threshold and root['size'] > size_threshold:
				196	leaks.append((root['count'], root['size'], root['count_by_type'], stack))
				197
				198
				199	def DumpTree(root, frame, output, threshold, size_threshold):
				200	output.write('\n{ \"name\": \"%s\",' % frame)
				201	if root['count'] > threshold and root['count'] > size_threshold:
				202	output.write(' \"size\": \"%s\",' % root['size'])
				203	output.write(' \"count\": \"%s\",' % root['count'])
				204	output.write(' \"children\": [')
				205	is_first = True
				206	for frame, child in root['children'].items():
				207	if is_first:
				208	is_first = False
				209	else:
				210	output.write(',')
				211
				212	DumpTree(child, frame, output, threshold, size_threshold)
				213	output.write(']')
				214	output.write('}')
				215
				216
				217	def GetEntries(heap, process):
				218	"""
				219	Returns all entries in a heap, after filtering out unknown entries, and doing
				220	some post processing to extract the relevant fields.
				221	"""
				222	if not process:
				223	return []
				224
				225	entries = []
				226	if process.version == 1:
				227	for raw_entry in process.allocators[heap]['entries']:
				228	# Cumulative sizes and types are skipped. see:
				229	# https://chromium.googlesource.com/chromium/src/+/a990af190304be5bf38b120799c594df5a293518/base/trace_event/heap_profiler_heap_dump_writer.cc#294
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	230	if 'type' not in raw_entry or not raw_entry['bt']:
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	231	continue
				232
				233	entry = Entry()
				234	entry.count = int(raw_entry['count'], 16)
				235	entry.size = int(raw_entry['size'], 16)
				236	entry.type = int(raw_entry['type'])
				237	entry.stackframe = int(raw_entry['bt'])
				238	entries.append(entry)
				239
				240	elif process.version == 2:
				241	raw_entries = zip(process.allocators[heap]['counts'],
				242	process.allocators[heap]['sizes'],
				243	process.allocators[heap]['types'],
				244	process.allocators[heap]['nodes'])
				245	for (raw_count, raw_size, raw_type, raw_stackframe) in raw_entries:
				246	entry = Entry()
				247	entry.count = raw_count
				248	entry.size = raw_size
				249	entry.type = raw_type
				250	entry.stackframe = raw_stackframe
				251	entries.append(entry)
				252
				253	# Resolve fields by looking into indexes
				254	ResolveMemoryDumpFields(entries, process.stackframes, process.types)
				255
				256	return entries
				257
				258
				259	def FilterProcesses(processes, filter_by_name, filter_by_labels):
				260	remaining_processes = {}
				261	for pid, process in processes.iteritems():
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	262	if filter_by_name and process.name != filter_by_name:
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	263	continue
				264	if (filter_by_labels and
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	265	(not process.labels or filter_by_labels not in process.labels)):
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	266	continue
				267	remaining_processes[pid] = process
				268
				269	return remaining_processes
				270
				271
				272	def FindRelevantProcesses(start_trace, end_trace,
				273	filter_by_name,
				274	filter_by_labels,
				275	match_by_labels):
				276	# Retrieve the processes and the associated memory dump.
				277	end_processes = FindMemoryDumps(end_trace)
				278	end_processes = FilterProcesses(end_processes, filter_by_name,
				279	filter_by_labels)
				280
				281	start_processes = None
				282	if start_trace:
				283	start_processes = FindMemoryDumps(start_trace)
				284	start_processes = FilterProcesses(start_processes, filter_by_name,
				285	filter_by_labels)
				286
				287	# Build a sequence of pair of processes to be compared.
				288	processes = []
				289	if not start_processes:
				290	# Only keep end-processes.
				291	for pid, end_process in end_processes.iteritems():
				292	processes.append((None, end_process))
				293	elif match_by_labels:
				294	# Processes are paired based on name/labels.
				295	for pid, end_process in end_processes.iteritems():
				296	matching_start_process = None
				297	for pid, start_process in start_processes.iteritems():
				298	if (start_process.name == end_process.name and
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	299	(start_process.name in ['Browser', 'GPU'] or
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	300	start_process.labels == end_process.labels)):
				301	matching_start_process = start_process
				302
				303	if matching_start_process:
				304	processes.append((matching_start_process, end_process))
				305	else:
				306	# Processes are paired based on their PID.
				307	relevant_pids = set(end_processes.keys()) & set(start_processes.keys())
				308	for pid in relevant_pids:
				309	start_process = start_processes[pid]
				310	end_process = end_processes[pid]
				311	processes.append((start_process, end_process))
				312
				313	return processes
				314
				315
				316	def BuildGraphDumps(processes, threshold, size_threshold):
				317	"""
				318	Build graph for a sequence of pair of processes.
				319	If start_process is None, counts objects in end_trace.
				320	Otherwise, counts objects present in end_trace, but not in start_process.
				321	"""
				322
				323	graph_dumps = []
				324
				325	for (start_process, end_process) in processes:
				326	pid = end_process.pid
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	327	name = end_process.name if end_process.name else ''
				328	labels = end_process.labels if end_process.labels else ''
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	329	print 'Process[%d] %s: %s' % (pid, name, labels)
				330
				331	for heap in end_process.allocators:
				332	start_entries = GetEntries(heap, start_process)
				333	end_entries = GetEntries(heap, end_process)
				334
				335	graph = GraphDump()
				336	graph.pid = pid
				337	graph.name = name
				338	graph.labels = labels
				339	graph.heap = heap
				340	graph_dumps.append(graph)
				341
				342	# Do the math: diffing start and end memory dumps.
				343	root = {}
				344	root['count'] = 0
				345	root['size'] = 0
				346	root['children'] = {}
				347	root['count_by_type'] = {}
				348
				349	for entry in start_entries:
				350	if entry.type:
				351	IncrementHeapEntry(entry.stackframe, - entry.count, - entry.size,
				352	entry.type, root)
				353	for entry in end_entries:
				354	if entry.type:
				355	IncrementHeapEntry(entry.stackframe, entry.count, entry.size,
				356	entry.type, root)
				357
				358	CanonicalHeapEntries(root)
				359
				360	graph.root = root
				361
				362	# Find leaks
				363	leaks = []
				364	FindLeaks(root, [], leaks, threshold, size_threshold)
				365	leaks.sort(reverse=True)
				366
				367	if leaks:
				368	print ' %s: %d potential leaks found.' % (heap, len(leaks))
				369	graph.leaks = leaks
				370	graph.leak_stackframes = len(leaks)
				371	for leak in leaks:
				372	graph.leak_objects += leak[0]
				373
				374	return graph_dumps
				375
				376
				377	def WritePotentialLeaks(graph_dumps):
				378	for graph in graph_dumps:
				379	if graph.leaks:
				380	filename = 'process_%d_%s-leaks.json' % (graph.pid, graph.heap)
				381	output_filename = os.path.join(_OUTPUT_DIR, filename)
				382	with open(output_filename, 'w') as output:
				383	json.dump(graph.leaks, output)
				384
				385
				386	def WriteGrahDumps(graph_dumps, threshold, size_threshold):
				387	for graph in graph_dumps:
				388	# Dump the remaining allocated objects tree.
				389	filename = 'process_%d_%s-objects.json' % (graph.pid, graph.heap)
				390	output_filename = os.path.join(_OUTPUT_GRAPH_DIR, filename)
				391	if graph.root:
				392	with open(output_filename, 'w') as output:
				393	DumpTree(graph.root, '.', output, threshold, size_threshold)
				394	graph.root = filename
				395
				396
				397	def WriteIndex(graph_dumps):
				398	output_filename = os.path.join(_OUTPUT_GRAPH_DIR, 'index.json')
				399	with open(output_filename, 'w') as output:
				400	json.dump([
				401	{'pid': graph.pid,
				402	'heap': graph.heap,
				403	'name': graph.name,
				404	'labels': graph.labels,
				405	'objects': graph.root,
				406	'potential leaks': graph.leak_stackframes,
				407	'objects leaked': graph.leak_objects,
				408	}
				409	for graph in graph_dumps], output)
				410
				411
				412	def WriteHTML():
				413	# Copy the HTML page.
				414	source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
				415	'diff_heap_profiler.html')
				416	destination = os.path.join(_OUTPUT_GRAPH_DIR, 'index.html')
				417	shutil.copyfile(source, destination)
				418
				419	# Copy the D3 library file.
				420	source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
etienneb	9d1bd9e	2017-06-28 19:35:18	[diff] [blame]	421	os.path.pardir,
				422	os.path.pardir,
				423	os.path.pardir,
				424	'tracing',
				425	'third_party',
				426	'd3',
etienneb	aaabc6a	2017-06-28 17:37:10	[diff] [blame]	427	'd3.min.js')
				428	destination = os.path.join(_OUTPUT_GRAPH_DIR, 'd3.min.js')
				429	shutil.copyfile(source, destination)
				430
				431
				432	def Main():
				433	parser = argparse.ArgumentParser()
				434	parser.add_argument(
				435	'--flame-graph',
				436	action='store_true',
				437	help='Output a flame graph based on stackframe allocations')
				438	parser.add_argument(
				439	'--threshold',
				440	type=int,
				441	default=0,
				442	help='Objects threshold for being a potential memory leak')
				443	parser.add_argument(
				444	'--size-threshold',
				445	type=int,
				446	default=0,
				447	help='Size threshold for being a potential memory leak')
				448	parser.add_argument(
				449	'--filter-by-name',
				450	type=str,
				451	help='Only keep processes with name (i.e. Browser, Renderer, ...)')
				452	parser.add_argument(
				453	'--filter-by-labels',
				454	type=str,
				455	help='Only keep processes with matching labels')
				456	parser.add_argument(
				457	'--match-by-labels',
				458	action='store_true',
				459	help='Match processes between runs by labels')
				460	parser.add_argument(
				461	'trace',
				462	nargs='+',
				463	help='Trace files to be processed')
				464	options = parser.parse_args()
				465
				466	if options.threshold == 0 and options.size_threshold == 0:
				467	options.threshold = 1000
				468
				469	if len(options.trace) == 1:
				470	end_trace = options.trace[0]
				471	start_trace = None
				472	else:
				473	start_trace = options.trace[0]
				474	end_trace = options.trace[1]
				475
				476	if not os.path.exists(_OUTPUT_DIR):
				477	os.makedirs(_OUTPUT_DIR)
				478
				479	# Find relevant processes to be processed.
				480	processes = FindRelevantProcesses(start_trace, end_trace,
				481	options.filter_by_name,
				482	options.filter_by_labels,
				483	options.match_by_labels)
				484
				485	graph_dumps = BuildGraphDumps(processes, options.threshold,
				486	options.size_threshold)
				487
				488	WritePotentialLeaks(graph_dumps)
				489
				490	if options.flame_graph:
				491	if not os.path.exists(_OUTPUT_GRAPH_DIR):
				492	os.makedirs(_OUTPUT_GRAPH_DIR)
				493	WriteGrahDumps(graph_dumps, options.threshold, options.size_threshold)
				494	WriteIndex(graph_dumps)
				495	WriteHTML()
				496
				497	if __name__ == '__main__':
				498	Main()