blob: 5b3243c246374e1a3ebb6e5285fb0e0cbf830e22 [file] [log] [blame]
etiennebaaabc6a2017-06-28 17:37:101#!/usr/bin/env python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import gzip
8import json
9import os
10import shutil
11
12_OUTPUT_DIR = 'output'
13_OUTPUT_GRAPH_DIR = os.path.join(_OUTPUT_DIR, 'graph')
14
15
16class Process(object):
17
18 def __init__(self):
19 self.pid = None
20 self.name = None
21 self.labels = None
etienneb94654a42017-06-29 21:43:1822 self.types = {}
23 self.strings = {}
24 self.stackframes = {}
etiennebaaabc6a2017-06-28 17:37:1025 self.allocators = None
26 self.version = None
27
28
29class Entry(object):
30
31 def __init__(self):
32 self.count = None
33 self.size = None
34 self.type = None
35 self.stackframe = None
36
37
38class GraphDump(object):
39
40 def __init__(self):
41 self.pid = None
42 self.name = None
43 self.labels = None
44 self.heap = None
45 self.root = ''
46 self.leaks = ''
47 self.leak_stackframes = 0
48 self.leak_objects = 0
49
etienneb12fd4ba2017-09-08 19:32:0650def OpenTraceFile(file_path, mode):
51 if file_path.endswith('.gz'):
52 return gzip.open(file_path, mode + 'b')
53 else:
54 return open(file_path, mode + 't')
55
etiennebaaabc6a2017-06-28 17:37:1056def FindMemoryDumps(filename):
57 processes = {}
58
etienneb12fd4ba2017-09-08 19:32:0659 with OpenTraceFile(filename, 'r') as f:
etiennebaaabc6a2017-06-28 17:37:1060 data = json.loads(f.read().decode('ascii'))
61
etiennebaaabc6a2017-06-28 17:37:1062 for event in data['traceEvents']:
63 pid = event['pid']
64 if pid not in processes:
65 processes[pid] = Process()
66 processes[pid].pid = pid
67 process = processes[pid]
68
69 # Retrieve process informations.
70 if event['ph'] == 'M':
71 if event['name'] == 'process_name':
72 process.name = event['args']['name']
73 if event['name'] == 'process_labels':
74 process.labels = event['args']['labels']
75
76 if event['name'] == 'typeNames':
77 process.types = {}
78 for type_id, t in event['args']['typeNames'].iteritems():
79 process.types[int(type_id)] = t
80
81 if event['name'] == 'stackFrames':
82 process.stackframes = {}
83 for stack_id, s in event['args']['stackFrames'].iteritems():
84 new_stackframe = {}
85 new_stackframe['name'] = s['name']
86 if 'parent' in s:
87 new_stackframe['parent'] = int(s['parent'])
88 process.stackframes[int(stack_id)] = new_stackframe
89
90 # Look for a detailed memory dump event.
91 if not ((event['name'] == 'periodic_interval' or
92 event['name'] == 'explicitly_triggered') and
93 event['args']['dumps']['level_of_detail'] == 'detailed'):
94 continue
95
96 # Check for a memory dump V1.
97 if u'heaps' in event['args']['dumps']:
98 # Get the first memory dump.
99 if not process.allocators:
100 process.version = 1
101 process.allocators = event['args']['dumps']['heaps']
102
103 # Check for a memory dump V2.
104 # See format: [chromium] src/base/trace_event/heap_profiler_event_writer.h
105 if u'heaps_v2' in event['args']['dumps']:
106 # Memory dump format V2 is dumping information incrementally. Update
etienneb94654a42017-06-29 21:43:18107 # the cumulated indexes.
etiennebaaabc6a2017-06-28 17:37:10108 maps = event['args']['dumps']['heaps_v2']['maps']
109 for string in maps['strings']:
etienneb94654a42017-06-29 21:43:18110 process.strings[string['id']] = string['string']
etiennebaaabc6a2017-06-28 17:37:10111
112 for node in maps['nodes']:
113 node_v1 = {}
etienneb94654a42017-06-29 21:43:18114 node_v1['name'] = process.strings[node['name_sid']]
etiennebaaabc6a2017-06-28 17:37:10115 if 'parent' in node:
116 node_v1['parent'] = node['parent']
etienneb94654a42017-06-29 21:43:18117 process.stackframes[node['id']] = node_v1
etiennebaaabc6a2017-06-28 17:37:10118
119 for t in maps['types']:
etienneb94654a42017-06-29 21:43:18120 process.types[t['id']] = process.strings[t['name_sid']]
etiennebaaabc6a2017-06-28 17:37:10121
122 # Get the first memory dump.
123 if not process.allocators:
124 dump = event['args']['dumps']
125 process.version = 2
126 process.allocators = dump['heaps_v2']['allocators']
127
128 # Remove processes with incomplete memory dump.
129 for pid, process in processes.items():
130 if not (process.allocators and process.stackframes and process.types):
131 del processes[pid]
132
133 return processes
134
135
136def ResolveMemoryDumpFields(entries, stackframes, types):
137 def ResolveStackTrace(stack_id, stackframes):
138 stackframe = stackframes[stack_id]
139 tail = ()
140 if 'parent' in stackframe:
141 tail = ResolveStackTrace(stackframe['parent'], stackframes)
142 name = stackframe['name'].replace('\r', '').replace('\n', '')
143 return (name,) + tail
144
145 def ResolveType(type_id, types):
146 return types[type_id]
147
148 for entry in entries:
149 entry.stackframe = ResolveStackTrace(entry.stackframe, stackframes)
150 entry.type = ResolveType(entry.type, types)
151
152
153def IncrementHeapEntry(stack, count, size, typename, root):
154 if not stack:
155 root['count'] += count
156 root['size'] += size
etienneb9d1bd9e2017-06-28 19:35:18157 if typename not in root['count_by_type']:
etiennebaaabc6a2017-06-28 17:37:10158 root['count_by_type'][typename] = 0
159 root['count_by_type'][typename] += count
160 else:
161 top = stack[-1]
162 tail = stack[:-1]
163
164 if top not in root['children']:
165 new_node = {}
166 new_node['count'] = 0
167 new_node['size'] = 0
168 new_node['children'] = {}
169 new_node['count_by_type'] = {}
170 root['children'][top] = new_node
171
172 IncrementHeapEntry(tail, count, size, typename, root['children'][top])
173
174
175def CanonicalHeapEntries(root):
176 total_count = 0
177 total_size = 0
178 for child in root['children'].itervalues():
179 total_count += child['count']
180 total_size += child['size']
181 root['count'] -= total_count
182 root['size'] -= total_size
183
184 for typename in root['count_by_type']:
185 total_count_for_type = 0
186 for child in root['children'].itervalues():
187 if typename in child['count_by_type']:
188 total_count_for_type += child['count_by_type'][typename]
189 root['count_by_type'][typename] -= total_count_for_type
190
191 for child in root['children'].itervalues():
192 CanonicalHeapEntries(child)
193
194
195def FindLeaks(root, stack, leaks, threshold, size_threshold):
196 for frame in root['children']:
197 FindLeaks(root['children'][frame], [frame] + stack, leaks, threshold,
198 size_threshold)
199
200 if root['count'] > threshold and root['size'] > size_threshold:
etienneb1b94de32017-09-18 19:57:29201 leaks.append({'count': root['count'],
202 'size': root['size'],
203 'count_by_type': root['count_by_type'],
204 'stackframes': stack})
etiennebaaabc6a2017-06-28 17:37:10205
206def DumpTree(root, frame, output, threshold, size_threshold):
207 output.write('\n{ \"name\": \"%s\",' % frame)
208 if root['count'] > threshold and root['count'] > size_threshold:
209 output.write(' \"size\": \"%s\",' % root['size'])
210 output.write(' \"count\": \"%s\",' % root['count'])
211 output.write(' \"children\": [')
212 is_first = True
213 for frame, child in root['children'].items():
214 if is_first:
215 is_first = False
216 else:
217 output.write(',')
218
219 DumpTree(child, frame, output, threshold, size_threshold)
220 output.write(']')
221 output.write('}')
222
223
224def GetEntries(heap, process):
225 """
226 Returns all entries in a heap, after filtering out unknown entries, and doing
227 some post processing to extract the relevant fields.
228 """
229 if not process:
230 return []
231
232 entries = []
233 if process.version == 1:
234 for raw_entry in process.allocators[heap]['entries']:
235 # Cumulative sizes and types are skipped. see:
236 # https://chromium.googlesource.com/chromium/src/+/a990af190304be5bf38b120799c594df5a293518/base/trace_event/heap_profiler_heap_dump_writer.cc#294
etienneb9d1bd9e2017-06-28 19:35:18237 if 'type' not in raw_entry or not raw_entry['bt']:
etiennebaaabc6a2017-06-28 17:37:10238 continue
239
240 entry = Entry()
241 entry.count = int(raw_entry['count'], 16)
242 entry.size = int(raw_entry['size'], 16)
243 entry.type = int(raw_entry['type'])
244 entry.stackframe = int(raw_entry['bt'])
245 entries.append(entry)
246
247 elif process.version == 2:
248 raw_entries = zip(process.allocators[heap]['counts'],
249 process.allocators[heap]['sizes'],
250 process.allocators[heap]['types'],
251 process.allocators[heap]['nodes'])
252 for (raw_count, raw_size, raw_type, raw_stackframe) in raw_entries:
253 entry = Entry()
254 entry.count = raw_count
255 entry.size = raw_size
256 entry.type = raw_type
257 entry.stackframe = raw_stackframe
258 entries.append(entry)
259
260 # Resolve fields by looking into indexes
261 ResolveMemoryDumpFields(entries, process.stackframes, process.types)
262
263 return entries
264
265
266def FilterProcesses(processes, filter_by_name, filter_by_labels):
267 remaining_processes = {}
268 for pid, process in processes.iteritems():
etienneb9d1bd9e2017-06-28 19:35:18269 if filter_by_name and process.name != filter_by_name:
etiennebaaabc6a2017-06-28 17:37:10270 continue
271 if (filter_by_labels and
etienneb9d1bd9e2017-06-28 19:35:18272 (not process.labels or filter_by_labels not in process.labels)):
etiennebaaabc6a2017-06-28 17:37:10273 continue
274 remaining_processes[pid] = process
275
276 return remaining_processes
277
278
279def FindRelevantProcesses(start_trace, end_trace,
280 filter_by_name,
281 filter_by_labels,
282 match_by_labels):
283 # Retrieve the processes and the associated memory dump.
284 end_processes = FindMemoryDumps(end_trace)
285 end_processes = FilterProcesses(end_processes, filter_by_name,
286 filter_by_labels)
287
288 start_processes = None
289 if start_trace:
290 start_processes = FindMemoryDumps(start_trace)
291 start_processes = FilterProcesses(start_processes, filter_by_name,
292 filter_by_labels)
293
294 # Build a sequence of pair of processes to be compared.
295 processes = []
296 if not start_processes:
297 # Only keep end-processes.
298 for pid, end_process in end_processes.iteritems():
299 processes.append((None, end_process))
300 elif match_by_labels:
301 # Processes are paired based on name/labels.
302 for pid, end_process in end_processes.iteritems():
303 matching_start_process = None
304 for pid, start_process in start_processes.iteritems():
305 if (start_process.name == end_process.name and
etienneb9d1bd9e2017-06-28 19:35:18306 (start_process.name in ['Browser', 'GPU'] or
etiennebaaabc6a2017-06-28 17:37:10307 start_process.labels == end_process.labels)):
308 matching_start_process = start_process
309
310 if matching_start_process:
311 processes.append((matching_start_process, end_process))
312 else:
313 # Processes are paired based on their PID.
314 relevant_pids = set(end_processes.keys()) & set(start_processes.keys())
315 for pid in relevant_pids:
316 start_process = start_processes[pid]
317 end_process = end_processes[pid]
318 processes.append((start_process, end_process))
319
320 return processes
321
322
323def BuildGraphDumps(processes, threshold, size_threshold):
324 """
325 Build graph for a sequence of pair of processes.
326 If start_process is None, counts objects in end_trace.
327 Otherwise, counts objects present in end_trace, but not in start_process.
328 """
329
330 graph_dumps = []
331
332 for (start_process, end_process) in processes:
333 pid = end_process.pid
etienneb9d1bd9e2017-06-28 19:35:18334 name = end_process.name if end_process.name else ''
335 labels = end_process.labels if end_process.labels else ''
etiennebaaabc6a2017-06-28 17:37:10336 print 'Process[%d] %s: %s' % (pid, name, labels)
337
338 for heap in end_process.allocators:
339 start_entries = GetEntries(heap, start_process)
340 end_entries = GetEntries(heap, end_process)
341
342 graph = GraphDump()
343 graph.pid = pid
344 graph.name = name
345 graph.labels = labels
346 graph.heap = heap
347 graph_dumps.append(graph)
348
349 # Do the math: diffing start and end memory dumps.
350 root = {}
351 root['count'] = 0
352 root['size'] = 0
353 root['children'] = {}
354 root['count_by_type'] = {}
355
356 for entry in start_entries:
357 if entry.type:
358 IncrementHeapEntry(entry.stackframe, - entry.count, - entry.size,
359 entry.type, root)
360 for entry in end_entries:
361 if entry.type:
362 IncrementHeapEntry(entry.stackframe, entry.count, entry.size,
363 entry.type, root)
364
365 CanonicalHeapEntries(root)
366
367 graph.root = root
368
369 # Find leaks
370 leaks = []
371 FindLeaks(root, [], leaks, threshold, size_threshold)
Erik Chen0564bf22017-09-28 07:38:50372 leaks.sort(reverse=True, key=lambda k: k['size'])
etiennebaaabc6a2017-06-28 17:37:10373
374 if leaks:
375 print ' %s: %d potential leaks found.' % (heap, len(leaks))
376 graph.leaks = leaks
377 graph.leak_stackframes = len(leaks)
378 for leak in leaks:
etienneb1b94de32017-09-18 19:57:29379 graph.leak_objects += leak['count']
etiennebaaabc6a2017-06-28 17:37:10380
381 return graph_dumps
382
383
384def WritePotentialLeaks(graph_dumps):
385 for graph in graph_dumps:
386 if graph.leaks:
387 filename = 'process_%d_%s-leaks.json' % (graph.pid, graph.heap)
388 output_filename = os.path.join(_OUTPUT_DIR, filename)
389 with open(output_filename, 'w') as output:
390 json.dump(graph.leaks, output)
391
392
393def WriteGrahDumps(graph_dumps, threshold, size_threshold):
394 for graph in graph_dumps:
395 # Dump the remaining allocated objects tree.
396 filename = 'process_%d_%s-objects.json' % (graph.pid, graph.heap)
397 output_filename = os.path.join(_OUTPUT_GRAPH_DIR, filename)
398 if graph.root:
399 with open(output_filename, 'w') as output:
400 DumpTree(graph.root, '.', output, threshold, size_threshold)
401 graph.root = filename
402
403
404def WriteIndex(graph_dumps):
405 output_filename = os.path.join(_OUTPUT_GRAPH_DIR, 'index.json')
406 with open(output_filename, 'w') as output:
407 json.dump([
408 {'pid': graph.pid,
409 'heap': graph.heap,
410 'name': graph.name,
411 'labels': graph.labels,
412 'objects': graph.root,
413 'potential leaks': graph.leak_stackframes,
414 'objects leaked': graph.leak_objects,
415 }
416 for graph in graph_dumps], output)
417
418
419def WriteHTML():
420 # Copy the HTML page.
421 source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
422 'diff_heap_profiler.html')
423 destination = os.path.join(_OUTPUT_GRAPH_DIR, 'index.html')
424 shutil.copyfile(source, destination)
425
426 # Copy the D3 library file.
427 source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
etienneb9d1bd9e2017-06-28 19:35:18428 os.path.pardir,
429 os.path.pardir,
430 os.path.pardir,
431 'tracing',
432 'third_party',
433 'd3',
etiennebaaabc6a2017-06-28 17:37:10434 'd3.min.js')
435 destination = os.path.join(_OUTPUT_GRAPH_DIR, 'd3.min.js')
436 shutil.copyfile(source, destination)
437
438
439def Main():
440 parser = argparse.ArgumentParser()
441 parser.add_argument(
442 '--flame-graph',
443 action='store_true',
444 help='Output a flame graph based on stackframe allocations')
445 parser.add_argument(
446 '--threshold',
447 type=int,
448 default=0,
449 help='Objects threshold for being a potential memory leak')
450 parser.add_argument(
451 '--size-threshold',
452 type=int,
453 default=0,
454 help='Size threshold for being a potential memory leak')
455 parser.add_argument(
456 '--filter-by-name',
457 type=str,
458 help='Only keep processes with name (i.e. Browser, Renderer, ...)')
459 parser.add_argument(
460 '--filter-by-labels',
461 type=str,
462 help='Only keep processes with matching labels')
463 parser.add_argument(
464 '--match-by-labels',
465 action='store_true',
466 help='Match processes between runs by labels')
467 parser.add_argument(
468 'trace',
469 nargs='+',
470 help='Trace files to be processed')
471 options = parser.parse_args()
472
473 if options.threshold == 0 and options.size_threshold == 0:
474 options.threshold = 1000
475
476 if len(options.trace) == 1:
477 end_trace = options.trace[0]
478 start_trace = None
479 else:
480 start_trace = options.trace[0]
481 end_trace = options.trace[1]
482
483 if not os.path.exists(_OUTPUT_DIR):
484 os.makedirs(_OUTPUT_DIR)
485
486 # Find relevant processes to be processed.
487 processes = FindRelevantProcesses(start_trace, end_trace,
488 options.filter_by_name,
489 options.filter_by_labels,
490 options.match_by_labels)
491
492 graph_dumps = BuildGraphDumps(processes, options.threshold,
493 options.size_threshold)
494
495 WritePotentialLeaks(graph_dumps)
496
497 if options.flame_graph:
498 if not os.path.exists(_OUTPUT_GRAPH_DIR):
499 os.makedirs(_OUTPUT_GRAPH_DIR)
500 WriteGrahDumps(graph_dumps, options.threshold, options.size_threshold)
501 WriteIndex(graph_dumps)
502 WriteHTML()
503
504if __name__ == '__main__':
505 Main()