blob: b2a462ee447e2f488d3768f2ba9f23ed3b511185 [file] [log] [blame]
etiennebaaabc6a2017-06-28 17:37:101#!/usr/bin/env python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import gzip
8import json
9import os
10import shutil
11
12_OUTPUT_DIR = 'output'
13_OUTPUT_GRAPH_DIR = os.path.join(_OUTPUT_DIR, 'graph')
14
15
16class Process(object):
17
18 def __init__(self):
19 self.pid = None
20 self.name = None
21 self.labels = None
etienneb94654a42017-06-29 21:43:1822 self.types = {}
23 self.strings = {}
24 self.stackframes = {}
etiennebaaabc6a2017-06-28 17:37:1025 self.allocators = None
26 self.version = None
27
28
29class Entry(object):
30
31 def __init__(self):
32 self.count = None
33 self.size = None
34 self.type = None
35 self.stackframe = None
36
37
38class GraphDump(object):
39
40 def __init__(self):
41 self.pid = None
42 self.name = None
43 self.labels = None
44 self.heap = None
45 self.root = ''
46 self.leaks = ''
47 self.leak_stackframes = 0
48 self.leak_objects = 0
49
50
etiennebaaabc6a2017-06-28 17:37:1051def FindMemoryDumps(filename):
52 processes = {}
53
54 with gzip.open(filename, 'rb') as f:
55 data = json.loads(f.read().decode('ascii'))
56
etiennebaaabc6a2017-06-28 17:37:1057 for event in data['traceEvents']:
58 pid = event['pid']
59 if pid not in processes:
60 processes[pid] = Process()
61 processes[pid].pid = pid
62 process = processes[pid]
63
64 # Retrieve process informations.
65 if event['ph'] == 'M':
66 if event['name'] == 'process_name':
67 process.name = event['args']['name']
68 if event['name'] == 'process_labels':
69 process.labels = event['args']['labels']
70
71 if event['name'] == 'typeNames':
72 process.types = {}
73 for type_id, t in event['args']['typeNames'].iteritems():
74 process.types[int(type_id)] = t
75
76 if event['name'] == 'stackFrames':
77 process.stackframes = {}
78 for stack_id, s in event['args']['stackFrames'].iteritems():
79 new_stackframe = {}
80 new_stackframe['name'] = s['name']
81 if 'parent' in s:
82 new_stackframe['parent'] = int(s['parent'])
83 process.stackframes[int(stack_id)] = new_stackframe
84
85 # Look for a detailed memory dump event.
86 if not ((event['name'] == 'periodic_interval' or
87 event['name'] == 'explicitly_triggered') and
88 event['args']['dumps']['level_of_detail'] == 'detailed'):
89 continue
90
91 # Check for a memory dump V1.
92 if u'heaps' in event['args']['dumps']:
93 # Get the first memory dump.
94 if not process.allocators:
95 process.version = 1
96 process.allocators = event['args']['dumps']['heaps']
97
98 # Check for a memory dump V2.
99 # See format: [chromium] src/base/trace_event/heap_profiler_event_writer.h
100 if u'heaps_v2' in event['args']['dumps']:
101 # Memory dump format V2 is dumping information incrementally. Update
etienneb94654a42017-06-29 21:43:18102 # the cumulated indexes.
etiennebaaabc6a2017-06-28 17:37:10103 maps = event['args']['dumps']['heaps_v2']['maps']
104 for string in maps['strings']:
etienneb94654a42017-06-29 21:43:18105 process.strings[string['id']] = string['string']
etiennebaaabc6a2017-06-28 17:37:10106
107 for node in maps['nodes']:
108 node_v1 = {}
etienneb94654a42017-06-29 21:43:18109 node_v1['name'] = process.strings[node['name_sid']]
etiennebaaabc6a2017-06-28 17:37:10110 if 'parent' in node:
111 node_v1['parent'] = node['parent']
etienneb94654a42017-06-29 21:43:18112 process.stackframes[node['id']] = node_v1
etiennebaaabc6a2017-06-28 17:37:10113
114 for t in maps['types']:
etienneb94654a42017-06-29 21:43:18115 process.types[t['id']] = process.strings[t['name_sid']]
etiennebaaabc6a2017-06-28 17:37:10116
117 # Get the first memory dump.
118 if not process.allocators:
119 dump = event['args']['dumps']
120 process.version = 2
121 process.allocators = dump['heaps_v2']['allocators']
122
123 # Remove processes with incomplete memory dump.
124 for pid, process in processes.items():
125 if not (process.allocators and process.stackframes and process.types):
126 del processes[pid]
127
128 return processes
129
130
131def ResolveMemoryDumpFields(entries, stackframes, types):
132 def ResolveStackTrace(stack_id, stackframes):
133 stackframe = stackframes[stack_id]
134 tail = ()
135 if 'parent' in stackframe:
136 tail = ResolveStackTrace(stackframe['parent'], stackframes)
137 name = stackframe['name'].replace('\r', '').replace('\n', '')
138 return (name,) + tail
139
140 def ResolveType(type_id, types):
141 return types[type_id]
142
143 for entry in entries:
144 entry.stackframe = ResolveStackTrace(entry.stackframe, stackframes)
145 entry.type = ResolveType(entry.type, types)
146
147
148def IncrementHeapEntry(stack, count, size, typename, root):
149 if not stack:
150 root['count'] += count
151 root['size'] += size
etienneb9d1bd9e2017-06-28 19:35:18152 if typename not in root['count_by_type']:
etiennebaaabc6a2017-06-28 17:37:10153 root['count_by_type'][typename] = 0
154 root['count_by_type'][typename] += count
155 else:
156 top = stack[-1]
157 tail = stack[:-1]
158
159 if top not in root['children']:
160 new_node = {}
161 new_node['count'] = 0
162 new_node['size'] = 0
163 new_node['children'] = {}
164 new_node['count_by_type'] = {}
165 root['children'][top] = new_node
166
167 IncrementHeapEntry(tail, count, size, typename, root['children'][top])
168
169
170def CanonicalHeapEntries(root):
171 total_count = 0
172 total_size = 0
173 for child in root['children'].itervalues():
174 total_count += child['count']
175 total_size += child['size']
176 root['count'] -= total_count
177 root['size'] -= total_size
178
179 for typename in root['count_by_type']:
180 total_count_for_type = 0
181 for child in root['children'].itervalues():
182 if typename in child['count_by_type']:
183 total_count_for_type += child['count_by_type'][typename]
184 root['count_by_type'][typename] -= total_count_for_type
185
186 for child in root['children'].itervalues():
187 CanonicalHeapEntries(child)
188
189
190def FindLeaks(root, stack, leaks, threshold, size_threshold):
191 for frame in root['children']:
192 FindLeaks(root['children'][frame], [frame] + stack, leaks, threshold,
193 size_threshold)
194
195 if root['count'] > threshold and root['size'] > size_threshold:
196 leaks.append((root['count'], root['size'], root['count_by_type'], stack))
197
198
199def DumpTree(root, frame, output, threshold, size_threshold):
200 output.write('\n{ \"name\": \"%s\",' % frame)
201 if root['count'] > threshold and root['count'] > size_threshold:
202 output.write(' \"size\": \"%s\",' % root['size'])
203 output.write(' \"count\": \"%s\",' % root['count'])
204 output.write(' \"children\": [')
205 is_first = True
206 for frame, child in root['children'].items():
207 if is_first:
208 is_first = False
209 else:
210 output.write(',')
211
212 DumpTree(child, frame, output, threshold, size_threshold)
213 output.write(']')
214 output.write('}')
215
216
217def GetEntries(heap, process):
218 """
219 Returns all entries in a heap, after filtering out unknown entries, and doing
220 some post processing to extract the relevant fields.
221 """
222 if not process:
223 return []
224
225 entries = []
226 if process.version == 1:
227 for raw_entry in process.allocators[heap]['entries']:
228 # Cumulative sizes and types are skipped. see:
229 # https://chromium.googlesource.com/chromium/src/+/a990af190304be5bf38b120799c594df5a293518/base/trace_event/heap_profiler_heap_dump_writer.cc#294
etienneb9d1bd9e2017-06-28 19:35:18230 if 'type' not in raw_entry or not raw_entry['bt']:
etiennebaaabc6a2017-06-28 17:37:10231 continue
232
233 entry = Entry()
234 entry.count = int(raw_entry['count'], 16)
235 entry.size = int(raw_entry['size'], 16)
236 entry.type = int(raw_entry['type'])
237 entry.stackframe = int(raw_entry['bt'])
238 entries.append(entry)
239
240 elif process.version == 2:
241 raw_entries = zip(process.allocators[heap]['counts'],
242 process.allocators[heap]['sizes'],
243 process.allocators[heap]['types'],
244 process.allocators[heap]['nodes'])
245 for (raw_count, raw_size, raw_type, raw_stackframe) in raw_entries:
246 entry = Entry()
247 entry.count = raw_count
248 entry.size = raw_size
249 entry.type = raw_type
250 entry.stackframe = raw_stackframe
251 entries.append(entry)
252
253 # Resolve fields by looking into indexes
254 ResolveMemoryDumpFields(entries, process.stackframes, process.types)
255
256 return entries
257
258
259def FilterProcesses(processes, filter_by_name, filter_by_labels):
260 remaining_processes = {}
261 for pid, process in processes.iteritems():
etienneb9d1bd9e2017-06-28 19:35:18262 if filter_by_name and process.name != filter_by_name:
etiennebaaabc6a2017-06-28 17:37:10263 continue
264 if (filter_by_labels and
etienneb9d1bd9e2017-06-28 19:35:18265 (not process.labels or filter_by_labels not in process.labels)):
etiennebaaabc6a2017-06-28 17:37:10266 continue
267 remaining_processes[pid] = process
268
269 return remaining_processes
270
271
272def FindRelevantProcesses(start_trace, end_trace,
273 filter_by_name,
274 filter_by_labels,
275 match_by_labels):
276 # Retrieve the processes and the associated memory dump.
277 end_processes = FindMemoryDumps(end_trace)
278 end_processes = FilterProcesses(end_processes, filter_by_name,
279 filter_by_labels)
280
281 start_processes = None
282 if start_trace:
283 start_processes = FindMemoryDumps(start_trace)
284 start_processes = FilterProcesses(start_processes, filter_by_name,
285 filter_by_labels)
286
287 # Build a sequence of pair of processes to be compared.
288 processes = []
289 if not start_processes:
290 # Only keep end-processes.
291 for pid, end_process in end_processes.iteritems():
292 processes.append((None, end_process))
293 elif match_by_labels:
294 # Processes are paired based on name/labels.
295 for pid, end_process in end_processes.iteritems():
296 matching_start_process = None
297 for pid, start_process in start_processes.iteritems():
298 if (start_process.name == end_process.name and
etienneb9d1bd9e2017-06-28 19:35:18299 (start_process.name in ['Browser', 'GPU'] or
etiennebaaabc6a2017-06-28 17:37:10300 start_process.labels == end_process.labels)):
301 matching_start_process = start_process
302
303 if matching_start_process:
304 processes.append((matching_start_process, end_process))
305 else:
306 # Processes are paired based on their PID.
307 relevant_pids = set(end_processes.keys()) & set(start_processes.keys())
308 for pid in relevant_pids:
309 start_process = start_processes[pid]
310 end_process = end_processes[pid]
311 processes.append((start_process, end_process))
312
313 return processes
314
315
316def BuildGraphDumps(processes, threshold, size_threshold):
317 """
318 Build graph for a sequence of pair of processes.
319 If start_process is None, counts objects in end_trace.
320 Otherwise, counts objects present in end_trace, but not in start_process.
321 """
322
323 graph_dumps = []
324
325 for (start_process, end_process) in processes:
326 pid = end_process.pid
etienneb9d1bd9e2017-06-28 19:35:18327 name = end_process.name if end_process.name else ''
328 labels = end_process.labels if end_process.labels else ''
etiennebaaabc6a2017-06-28 17:37:10329 print 'Process[%d] %s: %s' % (pid, name, labels)
330
331 for heap in end_process.allocators:
332 start_entries = GetEntries(heap, start_process)
333 end_entries = GetEntries(heap, end_process)
334
335 graph = GraphDump()
336 graph.pid = pid
337 graph.name = name
338 graph.labels = labels
339 graph.heap = heap
340 graph_dumps.append(graph)
341
342 # Do the math: diffing start and end memory dumps.
343 root = {}
344 root['count'] = 0
345 root['size'] = 0
346 root['children'] = {}
347 root['count_by_type'] = {}
348
349 for entry in start_entries:
350 if entry.type:
351 IncrementHeapEntry(entry.stackframe, - entry.count, - entry.size,
352 entry.type, root)
353 for entry in end_entries:
354 if entry.type:
355 IncrementHeapEntry(entry.stackframe, entry.count, entry.size,
356 entry.type, root)
357
358 CanonicalHeapEntries(root)
359
360 graph.root = root
361
362 # Find leaks
363 leaks = []
364 FindLeaks(root, [], leaks, threshold, size_threshold)
365 leaks.sort(reverse=True)
366
367 if leaks:
368 print ' %s: %d potential leaks found.' % (heap, len(leaks))
369 graph.leaks = leaks
370 graph.leak_stackframes = len(leaks)
371 for leak in leaks:
372 graph.leak_objects += leak[0]
373
374 return graph_dumps
375
376
377def WritePotentialLeaks(graph_dumps):
378 for graph in graph_dumps:
379 if graph.leaks:
380 filename = 'process_%d_%s-leaks.json' % (graph.pid, graph.heap)
381 output_filename = os.path.join(_OUTPUT_DIR, filename)
382 with open(output_filename, 'w') as output:
383 json.dump(graph.leaks, output)
384
385
386def WriteGrahDumps(graph_dumps, threshold, size_threshold):
387 for graph in graph_dumps:
388 # Dump the remaining allocated objects tree.
389 filename = 'process_%d_%s-objects.json' % (graph.pid, graph.heap)
390 output_filename = os.path.join(_OUTPUT_GRAPH_DIR, filename)
391 if graph.root:
392 with open(output_filename, 'w') as output:
393 DumpTree(graph.root, '.', output, threshold, size_threshold)
394 graph.root = filename
395
396
397def WriteIndex(graph_dumps):
398 output_filename = os.path.join(_OUTPUT_GRAPH_DIR, 'index.json')
399 with open(output_filename, 'w') as output:
400 json.dump([
401 {'pid': graph.pid,
402 'heap': graph.heap,
403 'name': graph.name,
404 'labels': graph.labels,
405 'objects': graph.root,
406 'potential leaks': graph.leak_stackframes,
407 'objects leaked': graph.leak_objects,
408 }
409 for graph in graph_dumps], output)
410
411
412def WriteHTML():
413 # Copy the HTML page.
414 source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
415 'diff_heap_profiler.html')
416 destination = os.path.join(_OUTPUT_GRAPH_DIR, 'index.html')
417 shutil.copyfile(source, destination)
418
419 # Copy the D3 library file.
420 source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
etienneb9d1bd9e2017-06-28 19:35:18421 os.path.pardir,
422 os.path.pardir,
423 os.path.pardir,
424 'tracing',
425 'third_party',
426 'd3',
etiennebaaabc6a2017-06-28 17:37:10427 'd3.min.js')
428 destination = os.path.join(_OUTPUT_GRAPH_DIR, 'd3.min.js')
429 shutil.copyfile(source, destination)
430
431
432def Main():
433 parser = argparse.ArgumentParser()
434 parser.add_argument(
435 '--flame-graph',
436 action='store_true',
437 help='Output a flame graph based on stackframe allocations')
438 parser.add_argument(
439 '--threshold',
440 type=int,
441 default=0,
442 help='Objects threshold for being a potential memory leak')
443 parser.add_argument(
444 '--size-threshold',
445 type=int,
446 default=0,
447 help='Size threshold for being a potential memory leak')
448 parser.add_argument(
449 '--filter-by-name',
450 type=str,
451 help='Only keep processes with name (i.e. Browser, Renderer, ...)')
452 parser.add_argument(
453 '--filter-by-labels',
454 type=str,
455 help='Only keep processes with matching labels')
456 parser.add_argument(
457 '--match-by-labels',
458 action='store_true',
459 help='Match processes between runs by labels')
460 parser.add_argument(
461 'trace',
462 nargs='+',
463 help='Trace files to be processed')
464 options = parser.parse_args()
465
466 if options.threshold == 0 and options.size_threshold == 0:
467 options.threshold = 1000
468
469 if len(options.trace) == 1:
470 end_trace = options.trace[0]
471 start_trace = None
472 else:
473 start_trace = options.trace[0]
474 end_trace = options.trace[1]
475
476 if not os.path.exists(_OUTPUT_DIR):
477 os.makedirs(_OUTPUT_DIR)
478
479 # Find relevant processes to be processed.
480 processes = FindRelevantProcesses(start_trace, end_trace,
481 options.filter_by_name,
482 options.filter_by_labels,
483 options.match_by_labels)
484
485 graph_dumps = BuildGraphDumps(processes, options.threshold,
486 options.size_threshold)
487
488 WritePotentialLeaks(graph_dumps)
489
490 if options.flame_graph:
491 if not os.path.exists(_OUTPUT_GRAPH_DIR):
492 os.makedirs(_OUTPUT_GRAPH_DIR)
493 WriteGrahDumps(graph_dumps, options.threshold, options.size_threshold)
494 WriteIndex(graph_dumps)
495 WriteHTML()
496
497if __name__ == '__main__':
498 Main()