blob: f15bae47b4a6213bbc1b1699d2e8f6de20939888 [file] [log] [blame]
Deepanjan Royfaaae892022-10-13 20:18:331#!/usr/bin/env vpython3
etiennebaaabc6a2017-06-28 17:37:102# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Fabrice de Gans97da6b12021-12-10 23:51:526from __future__ import absolute_import
7from __future__ import print_function
etiennebaaabc6a2017-06-28 17:37:108import argparse
9import gzip
10import json
11import os
12import shutil
Fabrice de Gans97da6b12021-12-10 23:51:5213import six
14from six.moves import zip
etiennebaaabc6a2017-06-28 17:37:1015
16_OUTPUT_DIR = 'output'
17_OUTPUT_GRAPH_DIR = os.path.join(_OUTPUT_DIR, 'graph')
18
19
20class Process(object):
21
22 def __init__(self):
23 self.pid = None
24 self.name = None
25 self.labels = None
etienneb94654a42017-06-29 21:43:1826 self.types = {}
27 self.strings = {}
28 self.stackframes = {}
etiennebaaabc6a2017-06-28 17:37:1029 self.allocators = None
30 self.version = None
31
32
33class Entry(object):
34
35 def __init__(self):
36 self.count = None
37 self.size = None
38 self.type = None
39 self.stackframe = None
40
41
42class GraphDump(object):
43
44 def __init__(self):
45 self.pid = None
46 self.name = None
47 self.labels = None
48 self.heap = None
49 self.root = ''
50 self.leaks = ''
51 self.leak_stackframes = 0
52 self.leak_objects = 0
53
etienneb12fd4ba2017-09-08 19:32:0654def OpenTraceFile(file_path, mode):
55 if file_path.endswith('.gz'):
56 return gzip.open(file_path, mode + 'b')
Fabrice de Gans97da6b12021-12-10 23:51:5257 return open(file_path, mode + 't')
etienneb12fd4ba2017-09-08 19:32:0658
etiennebaaabc6a2017-06-28 17:37:1059def FindMemoryDumps(filename):
60 processes = {}
61
etienneb12fd4ba2017-09-08 19:32:0662 with OpenTraceFile(filename, 'r') as f:
Deepanjan Royfaaae892022-10-13 20:18:3363 data = json.loads(f.read())
etiennebaaabc6a2017-06-28 17:37:1064
etiennebaaabc6a2017-06-28 17:37:1065 for event in data['traceEvents']:
66 pid = event['pid']
67 if pid not in processes:
68 processes[pid] = Process()
69 processes[pid].pid = pid
70 process = processes[pid]
71
72 # Retrieve process informations.
73 if event['ph'] == 'M':
Etienne Bergeroncee71762018-02-21 20:07:0674 if event['name'] == 'process_name' and 'name' in event['args']:
etiennebaaabc6a2017-06-28 17:37:1075 process.name = event['args']['name']
Etienne Bergeroncee71762018-02-21 20:07:0676 if event['name'] == 'process_labels' and 'labels' in event['args']:
etiennebaaabc6a2017-06-28 17:37:1077 process.labels = event['args']['labels']
78
79 if event['name'] == 'typeNames':
80 process.types = {}
Fabrice de Gans97da6b12021-12-10 23:51:5281 for type_id, t in six.iteritems(event['args']['typeNames']):
etiennebaaabc6a2017-06-28 17:37:1082 process.types[int(type_id)] = t
83
84 if event['name'] == 'stackFrames':
85 process.stackframes = {}
Fabrice de Gans97da6b12021-12-10 23:51:5286 for stack_id, s in six.iteritems(event['args']['stackFrames']):
etiennebaaabc6a2017-06-28 17:37:1087 new_stackframe = {}
88 new_stackframe['name'] = s['name']
89 if 'parent' in s:
90 new_stackframe['parent'] = int(s['parent'])
91 process.stackframes[int(stack_id)] = new_stackframe
92
93 # Look for a detailed memory dump event.
94 if not ((event['name'] == 'periodic_interval' or
95 event['name'] == 'explicitly_triggered') and
96 event['args']['dumps']['level_of_detail'] == 'detailed'):
97 continue
98
99 # Check for a memory dump V1.
100 if u'heaps' in event['args']['dumps']:
101 # Get the first memory dump.
102 if not process.allocators:
103 process.version = 1
104 process.allocators = event['args']['dumps']['heaps']
105
106 # Check for a memory dump V2.
107 # See format: [chromium] src/base/trace_event/heap_profiler_event_writer.h
108 if u'heaps_v2' in event['args']['dumps']:
109 # Memory dump format V2 is dumping information incrementally. Update
etienneb94654a42017-06-29 21:43:18110 # the cumulated indexes.
etiennebaaabc6a2017-06-28 17:37:10111 maps = event['args']['dumps']['heaps_v2']['maps']
112 for string in maps['strings']:
etienneb94654a42017-06-29 21:43:18113 process.strings[string['id']] = string['string']
etiennebaaabc6a2017-06-28 17:37:10114
115 for node in maps['nodes']:
116 node_v1 = {}
etienneb94654a42017-06-29 21:43:18117 node_v1['name'] = process.strings[node['name_sid']]
etiennebaaabc6a2017-06-28 17:37:10118 if 'parent' in node:
119 node_v1['parent'] = node['parent']
etienneb94654a42017-06-29 21:43:18120 process.stackframes[node['id']] = node_v1
etiennebaaabc6a2017-06-28 17:37:10121
122 for t in maps['types']:
etienneb94654a42017-06-29 21:43:18123 process.types[t['id']] = process.strings[t['name_sid']]
etiennebaaabc6a2017-06-28 17:37:10124
125 # Get the first memory dump.
126 if not process.allocators:
127 dump = event['args']['dumps']
128 process.version = 2
129 process.allocators = dump['heaps_v2']['allocators']
130
131 # Remove processes with incomplete memory dump.
Deepanjan Royfaaae892022-10-13 20:18:33132 # Note: Calling list() otherwise we can't modify list while iterating.
133 for pid, process in list(processes.items()):
etiennebaaabc6a2017-06-28 17:37:10134 if not (process.allocators and process.stackframes and process.types):
135 del processes[pid]
136
137 return processes
138
139
140def ResolveMemoryDumpFields(entries, stackframes, types):
141 def ResolveStackTrace(stack_id, stackframes):
142 stackframe = stackframes[stack_id]
143 tail = ()
144 if 'parent' in stackframe:
145 tail = ResolveStackTrace(stackframe['parent'], stackframes)
146 name = stackframe['name'].replace('\r', '').replace('\n', '')
147 return (name,) + tail
148
149 def ResolveType(type_id, types):
150 return types[type_id]
151
152 for entry in entries:
Wez58ab25b2018-02-06 19:21:09153 # Stackframe may be -1 (18446744073709551615L) when not stackframe are
154 # available.
155 if entry.stackframe not in stackframes:
156 entry.stackframe = []
157 else:
158 entry.stackframe = ResolveStackTrace(entry.stackframe, stackframes)
etiennebaaabc6a2017-06-28 17:37:10159 entry.type = ResolveType(entry.type, types)
160
161
162def IncrementHeapEntry(stack, count, size, typename, root):
163 if not stack:
164 root['count'] += count
165 root['size'] += size
etienneb9d1bd9e2017-06-28 19:35:18166 if typename not in root['count_by_type']:
etiennebaaabc6a2017-06-28 17:37:10167 root['count_by_type'][typename] = 0
168 root['count_by_type'][typename] += count
169 else:
170 top = stack[-1]
171 tail = stack[:-1]
172
173 if top not in root['children']:
174 new_node = {}
175 new_node['count'] = 0
176 new_node['size'] = 0
177 new_node['children'] = {}
178 new_node['count_by_type'] = {}
179 root['children'][top] = new_node
180
181 IncrementHeapEntry(tail, count, size, typename, root['children'][top])
182
183
184def CanonicalHeapEntries(root):
185 total_count = 0
186 total_size = 0
Fabrice de Gans97da6b12021-12-10 23:51:52187 for child in six.itervalues(root['children']):
etiennebaaabc6a2017-06-28 17:37:10188 total_count += child['count']
189 total_size += child['size']
190 root['count'] -= total_count
191 root['size'] -= total_size
192
193 for typename in root['count_by_type']:
194 total_count_for_type = 0
Fabrice de Gans97da6b12021-12-10 23:51:52195 for child in six.itervalues(root['children']):
etiennebaaabc6a2017-06-28 17:37:10196 if typename in child['count_by_type']:
197 total_count_for_type += child['count_by_type'][typename]
198 root['count_by_type'][typename] -= total_count_for_type
199
Fabrice de Gans97da6b12021-12-10 23:51:52200 for child in six.itervalues(root['children']):
etiennebaaabc6a2017-06-28 17:37:10201 CanonicalHeapEntries(child)
202
203
204def FindLeaks(root, stack, leaks, threshold, size_threshold):
205 for frame in root['children']:
206 FindLeaks(root['children'][frame], [frame] + stack, leaks, threshold,
207 size_threshold)
208
209 if root['count'] > threshold and root['size'] > size_threshold:
etienneb1b94de32017-09-18 19:57:29210 leaks.append({'count': root['count'],
211 'size': root['size'],
212 'count_by_type': root['count_by_type'],
213 'stackframes': stack})
etiennebaaabc6a2017-06-28 17:37:10214
215def DumpTree(root, frame, output, threshold, size_threshold):
216 output.write('\n{ \"name\": \"%s\",' % frame)
217 if root['count'] > threshold and root['count'] > size_threshold:
218 output.write(' \"size\": \"%s\",' % root['size'])
219 output.write(' \"count\": \"%s\",' % root['count'])
220 output.write(' \"children\": [')
221 is_first = True
Fabrice de Gans97da6b12021-12-10 23:51:52222 for child_frame, child in root['children'].items():
etiennebaaabc6a2017-06-28 17:37:10223 if is_first:
224 is_first = False
225 else:
226 output.write(',')
227
Fabrice de Gans97da6b12021-12-10 23:51:52228 DumpTree(child, child_frame, output, threshold, size_threshold)
etiennebaaabc6a2017-06-28 17:37:10229 output.write(']')
230 output.write('}')
231
232
233def GetEntries(heap, process):
234 """
235 Returns all entries in a heap, after filtering out unknown entries, and doing
236 some post processing to extract the relevant fields.
237 """
238 if not process:
239 return []
240
241 entries = []
242 if process.version == 1:
243 for raw_entry in process.allocators[heap]['entries']:
244 # Cumulative sizes and types are skipped. see:
245 # https://chromium.googlesource.com/chromium/src/+/a990af190304be5bf38b120799c594df5a293518/base/trace_event/heap_profiler_heap_dump_writer.cc#294
etienneb9d1bd9e2017-06-28 19:35:18246 if 'type' not in raw_entry or not raw_entry['bt']:
etiennebaaabc6a2017-06-28 17:37:10247 continue
248
249 entry = Entry()
250 entry.count = int(raw_entry['count'], 16)
251 entry.size = int(raw_entry['size'], 16)
252 entry.type = int(raw_entry['type'])
253 entry.stackframe = int(raw_entry['bt'])
254 entries.append(entry)
255
256 elif process.version == 2:
Fabrice de Gans97da6b12021-12-10 23:51:52257 raw_entries = list(zip(process.allocators[heap]['counts'],
etiennebaaabc6a2017-06-28 17:37:10258 process.allocators[heap]['sizes'],
259 process.allocators[heap]['types'],
Fabrice de Gans97da6b12021-12-10 23:51:52260 process.allocators[heap]['nodes']))
etiennebaaabc6a2017-06-28 17:37:10261 for (raw_count, raw_size, raw_type, raw_stackframe) in raw_entries:
262 entry = Entry()
263 entry.count = raw_count
264 entry.size = raw_size
265 entry.type = raw_type
266 entry.stackframe = raw_stackframe
267 entries.append(entry)
268
269 # Resolve fields by looking into indexes
270 ResolveMemoryDumpFields(entries, process.stackframes, process.types)
271
272 return entries
273
274
275def FilterProcesses(processes, filter_by_name, filter_by_labels):
276 remaining_processes = {}
Fabrice de Gans97da6b12021-12-10 23:51:52277 for pid, process in six.iteritems(processes):
etienneb9d1bd9e2017-06-28 19:35:18278 if filter_by_name and process.name != filter_by_name:
etiennebaaabc6a2017-06-28 17:37:10279 continue
280 if (filter_by_labels and
etienneb9d1bd9e2017-06-28 19:35:18281 (not process.labels or filter_by_labels not in process.labels)):
etiennebaaabc6a2017-06-28 17:37:10282 continue
283 remaining_processes[pid] = process
284
285 return remaining_processes
286
287
288def FindRelevantProcesses(start_trace, end_trace,
289 filter_by_name,
290 filter_by_labels,
291 match_by_labels):
292 # Retrieve the processes and the associated memory dump.
293 end_processes = FindMemoryDumps(end_trace)
294 end_processes = FilterProcesses(end_processes, filter_by_name,
295 filter_by_labels)
296
297 start_processes = None
298 if start_trace:
299 start_processes = FindMemoryDumps(start_trace)
300 start_processes = FilterProcesses(start_processes, filter_by_name,
301 filter_by_labels)
302
303 # Build a sequence of pair of processes to be compared.
304 processes = []
305 if not start_processes:
306 # Only keep end-processes.
Fabrice de Gans97da6b12021-12-10 23:51:52307 for _, end_process in six.iteritems(end_processes):
etiennebaaabc6a2017-06-28 17:37:10308 processes.append((None, end_process))
309 elif match_by_labels:
310 # Processes are paired based on name/labels.
Fabrice de Gans97da6b12021-12-10 23:51:52311 for _, end_process in six.iteritems(end_processes):
etiennebaaabc6a2017-06-28 17:37:10312 matching_start_process = None
Fabrice de Gans97da6b12021-12-10 23:51:52313 for _, start_process in six.iteritems(start_processes):
etiennebaaabc6a2017-06-28 17:37:10314 if (start_process.name == end_process.name and
etienneb9d1bd9e2017-06-28 19:35:18315 (start_process.name in ['Browser', 'GPU'] or
etiennebaaabc6a2017-06-28 17:37:10316 start_process.labels == end_process.labels)):
317 matching_start_process = start_process
318
319 if matching_start_process:
320 processes.append((matching_start_process, end_process))
321 else:
322 # Processes are paired based on their PID.
323 relevant_pids = set(end_processes.keys()) & set(start_processes.keys())
324 for pid in relevant_pids:
325 start_process = start_processes[pid]
326 end_process = end_processes[pid]
327 processes.append((start_process, end_process))
328
329 return processes
330
331
332def BuildGraphDumps(processes, threshold, size_threshold):
333 """
334 Build graph for a sequence of pair of processes.
335 If start_process is None, counts objects in end_trace.
336 Otherwise, counts objects present in end_trace, but not in start_process.
337 """
338
339 graph_dumps = []
340
341 for (start_process, end_process) in processes:
342 pid = end_process.pid
etienneb9d1bd9e2017-06-28 19:35:18343 name = end_process.name if end_process.name else ''
344 labels = end_process.labels if end_process.labels else ''
Fabrice de Gans97da6b12021-12-10 23:51:52345 print('Process[%d] %s: %s' % (pid, name, labels))
etiennebaaabc6a2017-06-28 17:37:10346
347 for heap in end_process.allocators:
348 start_entries = GetEntries(heap, start_process)
349 end_entries = GetEntries(heap, end_process)
350
351 graph = GraphDump()
352 graph.pid = pid
353 graph.name = name
354 graph.labels = labels
355 graph.heap = heap
356 graph_dumps.append(graph)
357
358 # Do the math: diffing start and end memory dumps.
359 root = {}
360 root['count'] = 0
361 root['size'] = 0
362 root['children'] = {}
363 root['count_by_type'] = {}
364
365 for entry in start_entries:
366 if entry.type:
367 IncrementHeapEntry(entry.stackframe, - entry.count, - entry.size,
368 entry.type, root)
369 for entry in end_entries:
370 if entry.type:
371 IncrementHeapEntry(entry.stackframe, entry.count, entry.size,
372 entry.type, root)
373
374 CanonicalHeapEntries(root)
375
376 graph.root = root
377
378 # Find leaks
379 leaks = []
380 FindLeaks(root, [], leaks, threshold, size_threshold)
Erik Chen0564bf22017-09-28 07:38:50381 leaks.sort(reverse=True, key=lambda k: k['size'])
etiennebaaabc6a2017-06-28 17:37:10382
383 if leaks:
Fabrice de Gans97da6b12021-12-10 23:51:52384 print(' %s: %d potential leaks found.' % (heap, len(leaks)))
etiennebaaabc6a2017-06-28 17:37:10385 graph.leaks = leaks
386 graph.leak_stackframes = len(leaks)
387 for leak in leaks:
etienneb1b94de32017-09-18 19:57:29388 graph.leak_objects += leak['count']
etiennebaaabc6a2017-06-28 17:37:10389
390 return graph_dumps
391
392
393def WritePotentialLeaks(graph_dumps):
394 for graph in graph_dumps:
395 if graph.leaks:
396 filename = 'process_%d_%s-leaks.json' % (graph.pid, graph.heap)
397 output_filename = os.path.join(_OUTPUT_DIR, filename)
398 with open(output_filename, 'w') as output:
399 json.dump(graph.leaks, output)
400
401
402def WriteGrahDumps(graph_dumps, threshold, size_threshold):
403 for graph in graph_dumps:
404 # Dump the remaining allocated objects tree.
405 filename = 'process_%d_%s-objects.json' % (graph.pid, graph.heap)
406 output_filename = os.path.join(_OUTPUT_GRAPH_DIR, filename)
407 if graph.root:
408 with open(output_filename, 'w') as output:
409 DumpTree(graph.root, '.', output, threshold, size_threshold)
410 graph.root = filename
411
412
413def WriteIndex(graph_dumps):
414 output_filename = os.path.join(_OUTPUT_GRAPH_DIR, 'index.json')
415 with open(output_filename, 'w') as output:
416 json.dump([
417 {'pid': graph.pid,
418 'heap': graph.heap,
419 'name': graph.name,
420 'labels': graph.labels,
421 'objects': graph.root,
422 'potential leaks': graph.leak_stackframes,
423 'objects leaked': graph.leak_objects,
424 }
425 for graph in graph_dumps], output)
426
427
428def WriteHTML():
429 # Copy the HTML page.
430 source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
431 'diff_heap_profiler.html')
432 destination = os.path.join(_OUTPUT_GRAPH_DIR, 'index.html')
433 shutil.copyfile(source, destination)
434
435 # Copy the D3 library file.
436 source = os.path.join(os.path.dirname(os.path.abspath(__file__)),
etienneb9d1bd9e2017-06-28 19:35:18437 os.path.pardir,
438 os.path.pardir,
439 os.path.pardir,
440 'tracing',
441 'third_party',
442 'd3',
etiennebaaabc6a2017-06-28 17:37:10443 'd3.min.js')
444 destination = os.path.join(_OUTPUT_GRAPH_DIR, 'd3.min.js')
445 shutil.copyfile(source, destination)
446
447
448def Main():
449 parser = argparse.ArgumentParser()
450 parser.add_argument(
451 '--flame-graph',
452 action='store_true',
453 help='Output a flame graph based on stackframe allocations')
454 parser.add_argument(
455 '--threshold',
456 type=int,
457 default=0,
458 help='Objects threshold for being a potential memory leak')
459 parser.add_argument(
460 '--size-threshold',
461 type=int,
462 default=0,
463 help='Size threshold for being a potential memory leak')
464 parser.add_argument(
465 '--filter-by-name',
466 type=str,
467 help='Only keep processes with name (i.e. Browser, Renderer, ...)')
468 parser.add_argument(
469 '--filter-by-labels',
470 type=str,
471 help='Only keep processes with matching labels')
472 parser.add_argument(
473 '--match-by-labels',
474 action='store_true',
475 help='Match processes between runs by labels')
476 parser.add_argument(
477 'trace',
478 nargs='+',
479 help='Trace files to be processed')
480 options = parser.parse_args()
481
482 if options.threshold == 0 and options.size_threshold == 0:
483 options.threshold = 1000
484
485 if len(options.trace) == 1:
486 end_trace = options.trace[0]
487 start_trace = None
488 else:
489 start_trace = options.trace[0]
490 end_trace = options.trace[1]
491
492 if not os.path.exists(_OUTPUT_DIR):
493 os.makedirs(_OUTPUT_DIR)
494
495 # Find relevant processes to be processed.
496 processes = FindRelevantProcesses(start_trace, end_trace,
497 options.filter_by_name,
498 options.filter_by_labels,
499 options.match_by_labels)
500
501 graph_dumps = BuildGraphDumps(processes, options.threshold,
502 options.size_threshold)
503
504 WritePotentialLeaks(graph_dumps)
505
506 if options.flame_graph:
507 if not os.path.exists(_OUTPUT_GRAPH_DIR):
508 os.makedirs(_OUTPUT_GRAPH_DIR)
509 WriteGrahDumps(graph_dumps, options.threshold, options.size_threshold)
510 WriteIndex(graph_dumps)
511 WriteHTML()
512
513if __name__ == '__main__':
514 Main()