blob: 5844f37aad66903cae89b48d1a1666066f66c3eb [file] [log] [blame]
Tom Andersonc3ed8962017-10-09 19:01:461#!/usr/bin/env python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import cgi
8import colorsys
9import difflib
10import random
11import os
12import re
13import subprocess
14import sys
15import tempfile
16import textwrap
17import webbrowser
18
19
20class TokenContext(object):
21 """Metadata about a token.
22
23 Attributes:
24 row: Row index of the token in the data file.
25 column: Column index of the token in the data file.
26 token: The token string.
Tom Anderson65410152017-10-17 01:53:1927 commit: A Commit object that corresponds to the commit that added
28 this token.
Tom Andersonc3ed8962017-10-09 19:01:4629 """
Tom Andersonb3d7e642018-04-13 16:23:4230
Tom Andersonc3ed8962017-10-09 19:01:4631 def __init__(self, row, column, token, commit=None):
32 self.row = row
33 self.column = column
34 self.token = token
35 self.commit = commit
36
37
38class Commit(object):
39 """Commit data.
40
41 Attributes:
42 hash: The commit hash.
Tom Anderson65410152017-10-17 01:53:1943 author_name: The author's name.
44 author_email: the author's email.
45 author_date: The date and time the author created this commit.
46 message: The commit message.
Tom Andersonc3ed8962017-10-09 19:01:4647 diff: The commit diff.
48 """
Tom Andersonb3d7e642018-04-13 16:23:4249
Tom Anderson65410152017-10-17 01:53:1950 def __init__(self, hash, author_name, author_email, author_date, message,
51 diff):
Tom Andersonc3ed8962017-10-09 19:01:4652 self.hash = hash
Tom Anderson65410152017-10-17 01:53:1953 self.author_name = author_name
54 self.author_email = author_email
55 self.author_date = author_date
56 self.message = message
Tom Andersonc3ed8962017-10-09 19:01:4657 self.diff = diff
58
59
Tom Andersonb3d7e642018-04-13 16:23:4260def tokenize_data(data, tokenize_by_char, tokenize_whitespace):
Tom Andersonc3ed8962017-10-09 19:01:4661 """Tokenizes |data|.
62
63 Args:
64 data: String to tokenize.
Tom Andersonb3d7e642018-04-13 16:23:4265 tokenize_by_char: If true, individual characters are treated as tokens.
66 Otherwise, tokens are either symbols or strings of both alphanumeric
67 characters and underscores.
68 tokenize_whitespace: Treat non-newline whitespace characters as tokens.
Tom Andersonc3ed8962017-10-09 19:01:4669
70 Returns:
Tom Andersonb3d7e642018-04-13 16:23:4271 A list of lists of TokenContexts. Each list represents a line.
Tom Andersonc3ed8962017-10-09 19:01:4672 """
73 contexts = []
74 in_identifier = False
75 identifier_start = 0
76 identifier = ''
77 row = 0
78 column = 0
79 line_contexts = []
80
81 for c in data + '\n':
Tom Andersonb3d7e642018-04-13 16:23:4282 if not tokenize_by_char and (c.isalnum() or c == '_'):
Tom Andersonc3ed8962017-10-09 19:01:4683 if in_identifier:
84 identifier += c
85 else:
86 in_identifier = True
87 identifier_start = column
88 identifier = c
89 else:
90 if in_identifier:
Tom Andersonb3d7e642018-04-13 16:23:4291 line_contexts.append(TokenContext(row, identifier_start, identifier))
Tom Andersonc3ed8962017-10-09 19:01:4692 in_identifier = False
Tom Andersonb3d7e642018-04-13 16:23:4293 if not c.isspace() or (tokenize_whitespace and c != '\n'):
Tom Andersonc3ed8962017-10-09 19:01:4694 line_contexts.append(TokenContext(row, column, c))
95
96 if c == '\n':
97 row += 1
98 column = 0
99 contexts.append(line_contexts)
100 line_tokens = []
101 line_contexts = []
102 else:
103 column += 1
104 return contexts
105
106
107def compute_unified_diff(old_tokens, new_tokens):
108 """Computes the diff between |old_tokens| and |new_tokens|.
109
110 Args:
111 old_tokens: Token strings corresponding to the old data.
112 new_tokens: Token strings corresponding to the new data.
113
114 Returns:
115 The diff, in unified diff format.
116 """
117 return difflib.unified_diff(old_tokens, new_tokens, n=0, lineterm='')
118
119
120def parse_chunk_header_file_range(file_range):
121 """Parses a chunk header file range.
122
123 Diff chunk headers have the form:
124 @@ -<file-range> +<file-range> @@
125 File ranges have the form:
126 <start line number>,<number of lines changed>
127
128 Args:
129 file_range: A chunk header file range.
130
131 Returns:
132 A tuple (range_start, range_end). The endpoints are adjusted such that
133 iterating over [range_start, range_end) will give the changed indices.
134 """
135 if ',' in file_range:
136 file_range_parts = file_range.split(',')
137 start = int(file_range_parts[0])
138 amount = int(file_range_parts[1])
139 if amount == 0:
140 return (start, start)
141 return (start - 1, start + amount - 1)
142 else:
143 return (int(file_range) - 1, int(file_range))
144
145
146def compute_changed_token_indices(previous_tokens, current_tokens):
147 """Computes changed and added tokens.
148
149 Args:
150 previous_tokens: Tokens corresponding to the old file.
151 current_tokens: Tokens corresponding to the new file.
152
153 Returns:
154 A tuple (added_tokens, changed_tokens).
155 added_tokens: A list of indices into |current_tokens|.
156 changed_tokens: A map of indices into |current_tokens| to
157 indices into |previous_tokens|.
158 """
159 prev_file_chunk_end = 0
160 prev_patched_chunk_end = 0
161 added_tokens = []
162 changed_tokens = {}
163 for line in compute_unified_diff(previous_tokens, current_tokens):
164 if line.startswith("@@"):
165 parts = line.split(' ')
166 removed = parts[1].lstrip('-')
167 removed_start, removed_end = parse_chunk_header_file_range(removed)
168 added = parts[2].lstrip('+')
169 added_start, added_end = parse_chunk_header_file_range(added)
170 for i in range(added_start, added_end):
171 added_tokens.append(i)
172 for i in range(0, removed_start - prev_patched_chunk_end):
173 changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
174 prev_patched_chunk_end = removed_end
175 prev_file_chunk_end = added_end
176 for i in range(0, len(previous_tokens) - prev_patched_chunk_end):
177 changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
178 return added_tokens, changed_tokens
179
180
181def flatten_nested_list(l):
182 """Flattens a list and provides a mapping from elements in the list back
183 into the nested list.
184
185 Args:
186 l: A list of lists.
187
188 Returns:
189 A tuple (flattened, index_to_position):
190 flattened: The flattened list.
191 index_to_position: A list of pairs (r, c) such that
192 index_to_position[i] == (r, c); flattened[i] == l[r][c]
193 """
194 flattened = []
195 index_to_position = {}
196 r = 0
197 c = 0
198 for nested_list in l:
199 for element in nested_list:
200 index_to_position[len(flattened)] = (r, c)
201 flattened.append(element)
202 c += 1
203 r += 1
204 c = 0
205 return (flattened, index_to_position)
206
207
208def compute_changed_token_positions(previous_tokens, current_tokens):
209 """Computes changed and added token positions.
210
211 Args:
212 previous_tokens: A list of lists of token strings. Lines in the file
213 correspond to the nested lists.
214 current_tokens: A list of lists of token strings. Lines in the file
215 correspond to the nested lists.
216
217 Returns:
218 A tuple (added_token_positions, changed_token_positions):
219 added_token_positions: A list of pairs that index into |current_tokens|.
220 changed_token_positions: A map from pairs that index into
221 |current_tokens| to pairs that index into |previous_tokens|.
222 """
223 flat_previous_tokens, previous_index_to_position = flatten_nested_list(
224 previous_tokens)
225 flat_current_tokens, current_index_to_position = flatten_nested_list(
226 current_tokens)
227 added_indices, changed_indices = compute_changed_token_indices(
228 flat_previous_tokens, flat_current_tokens)
229 added_token_positions = [current_index_to_position[i] for i in added_indices]
230 changed_token_positions = {
231 current_index_to_position[current_i]:
232 previous_index_to_position[changed_indices[current_i]]
233 for current_i in changed_indices
234 }
235 return (added_token_positions, changed_token_positions)
236
237
238def parse_chunks_from_diff(diff):
239 """Returns a generator of chunk data from a diff.
240
241 Args:
242 diff: A list of strings, with each string being a line from a diff
243 in unified diff format.
244
245 Returns:
Tom Anderson5002161142018-04-17 00:30:21246 A generator of tuples (added_lines_start, added_lines_end, removed_lines)
Tom Andersonc3ed8962017-10-09 19:01:46247 """
Tom Anderson5002161142018-04-17 00:30:21248 it = iter(diff)
249 for line in it:
250 while not line.startswith('@@'):
251 line = it.next()
252 parts = line.split(' ')
253 previous_start, previous_end = parse_chunk_header_file_range(
254 parts[1].lstrip('-'))
255 current_start, current_end = parse_chunk_header_file_range(
256 parts[2].lstrip('+'))
257
258 in_delta = False
259 added_lines_start = None
260 added_lines_end = None
261 removed_lines = []
262 while previous_start < previous_end or current_start < current_end:
263 line = it.next()
264 firstchar = line[0]
265 line = line[1:]
266 if not in_delta and (firstchar == '-' or firstchar == '+'):
267 in_delta = True
268 added_lines_start = current_start
269 added_lines_end = current_start
270 removed_lines = []
271
272 if firstchar == '-':
273 removed_lines.append(line)
274 previous_start += 1
275 elif firstchar == '+':
276 current_start += 1
277 added_lines_end = current_start
278 elif firstchar == ' ':
279 if in_delta:
280 in_delta = False
281 yield (added_lines_start, added_lines_end, removed_lines)
282 previous_start += 1
283 current_start += 1
284 if in_delta:
285 yield (added_lines_start, added_lines_end, removed_lines)
Tom Andersonc3ed8962017-10-09 19:01:46286
287
288def should_skip_commit(commit):
289 """Decides if |commit| should be skipped when computing the blame.
290
291 Commit 5d4451e deleted all files in the repo except for DEPS. The
292 next commit, 1e7896, brought them back. This is a hack to skip
293 those commits (except for the files they modified). If we did not
294 do this, changes would be incorrectly attributed to 1e7896.
295
296 Args:
297 commit: A Commit object.
298
299 Returns:
300 A boolean indicating if this commit should be skipped.
301 """
302 banned_commits = [
303 '1e78967ed2f1937b3809c19d91e7dd62d756d307',
304 '5d4451ebf298d9d71f716cc0135f465cec41fcd0',
305 ]
306 if commit.hash not in banned_commits:
307 return False
308 banned_commits_file_exceptions = [
309 'DEPS',
310 'chrome/browser/ui/views/file_manager_dialog_browsertest.cc',
311 ]
312 for line in commit.diff:
313 if line.startswith('---') or line.startswith('+++'):
314 if line.split(' ')[1] in banned_commits_file_exceptions:
315 return False
316 elif line.startswith('@@'):
317 return True
318 assert False
319
320
Tom Anderson1e716922017-10-12 19:43:49321def generate_substrings(file):
322 """Generates substrings from a file stream, where substrings are
323 separated by '\0'.
Tom Andersonc3ed8962017-10-09 19:01:46324
Tom Anderson1e716922017-10-12 19:43:49325 For example, the input:
326 'a\0bc\0\0\0d\0'
Tom Andersonc3ed8962017-10-09 19:01:46327 would produce the output:
Tom Anderson1e716922017-10-12 19:43:49328 ['a', 'bc', 'd']
Tom Andersonc3ed8962017-10-09 19:01:46329
330 Args:
Tom Anderson1e716922017-10-12 19:43:49331 file: A readable file.
Tom Andersonc3ed8962017-10-09 19:01:46332 """
Tom Anderson65410152017-10-17 01:53:19333 BUF_SIZE = 448 # Experimentally found to be pretty fast.
334 data = []
Tom Anderson1e716922017-10-12 19:43:49335 while True:
Tom Anderson65410152017-10-17 01:53:19336 buf = file.read(BUF_SIZE)
337 parts = buf.split('\0')
338 data.append(parts[0])
339 if len(parts) > 1:
340 joined = ''.join(data)
341 if joined != '':
342 yield joined
343 for i in range(1, len(parts) - 1):
344 if parts[i] != '':
345 yield parts[i]
346 data = [parts[-1]]
347 if len(buf) < BUF_SIZE:
348 joined = ''.join(data)
349 if joined != '':
350 yield joined
351 return
Tom Andersonc3ed8962017-10-09 19:01:46352
353
354def generate_commits(git_log_stdout):
355 """Parses git log output into a stream of Commit objects.
356 """
Tom Anderson1e716922017-10-12 19:43:49357 substring_generator = generate_substrings(git_log_stdout)
Tom Andersonc3ed8962017-10-09 19:01:46358 while True:
Tom Anderson65410152017-10-17 01:53:19359 hash = substring_generator.next()
360 author_name = substring_generator.next()
361 author_email = substring_generator.next()
362 author_date = substring_generator.next()
363 message = substring_generator.next()
364 diff = substring_generator.next().split('\n')
365 yield Commit(hash, author_name, author_email, author_date, message, diff)
Tom Andersonc3ed8962017-10-09 19:01:46366
367
Tom Andersonb3d7e642018-04-13 16:23:42368def uberblame_aux(file_name, git_log_stdout, data, tokenization_method):
Tom Andersonc3ed8962017-10-09 19:01:46369 """Computes the uberblame of file |file_name|.
370
371 Args:
372 file_name: File to uberblame.
373 git_log_stdout: A file object that represents the git log output.
374 data: A string containing the data of file |file_name|.
Tom Andersonb3d7e642018-04-13 16:23:42375 tokenization_method: A function that takes a string and returns a list of
376 TokenContexts.
Tom Andersonc3ed8962017-10-09 19:01:46377
378 Returns:
379 A tuple (data, blame).
380 data: File contents.
381 blame: A list of TokenContexts.
382 """
Tom Andersonb3d7e642018-04-13 16:23:42383 blame = tokenization_method(data)
Tom Andersonc3ed8962017-10-09 19:01:46384
385 blamed_tokens = 0
386 total_tokens = len(blame)
387 uber_blame = (data, blame[:])
388
389 for commit in generate_commits(git_log_stdout):
390 if should_skip_commit(commit):
391 continue
392
393 offset = 0
Tom Anderson5002161142018-04-17 00:30:21394 for (added_lines_start, added_lines_end,
395 removed_lines) in parse_chunks_from_diff(commit.diff):
Tom Andersonc3ed8962017-10-09 19:01:46396 added_lines_start += offset
397 added_lines_end += offset
Tom Andersonb3d7e642018-04-13 16:23:42398 previous_contexts = [
399 token_lines
400 for line_previous in removed_lines
401 for token_lines in tokenization_method(line_previous)
Tom Andersonc3ed8962017-10-09 19:01:46402 ]
Tom Andersonb3d7e642018-04-13 16:23:42403 previous_tokens = [[context.token for context in contexts]
404 for contexts in previous_contexts]
Tom Andersonc3ed8962017-10-09 19:01:46405 current_contexts = blame[added_lines_start:added_lines_end]
Tom Andersonb3d7e642018-04-13 16:23:42406 current_tokens = [[context.token for context in contexts]
407 for contexts in current_contexts]
Tom Andersonc3ed8962017-10-09 19:01:46408 added_token_positions, changed_token_positions = (
409 compute_changed_token_positions(previous_tokens, current_tokens))
410 for r, c in added_token_positions:
Tom Anderson65410152017-10-17 01:53:19411 current_contexts[r][c].commit = commit
Tom Andersonc3ed8962017-10-09 19:01:46412 blamed_tokens += 1
413 for r, c in changed_token_positions:
414 pr, pc = changed_token_positions[(r, c)]
415 previous_contexts[pr][pc] = current_contexts[r][c]
416
417 assert added_lines_start <= added_lines_end <= len(blame)
418 current_blame_size = len(blame)
419 blame[added_lines_start:added_lines_end] = previous_contexts
420 offset += len(blame) - current_blame_size
421
422 assert blame == [] or blame == [[]]
423 return uber_blame
424
425
Tom Andersonb3d7e642018-04-13 16:23:42426def uberblame(file_name, revision, tokenization_method):
Tom Andersonc3ed8962017-10-09 19:01:46427 """Computes the uberblame of file |file_name|.
428
429 Args:
430 file_name: File to uberblame.
431 revision: The revision to start the uberblame at.
Tom Andersonb3d7e642018-04-13 16:23:42432 tokenization_method: A function that takes a string and returns a list of
433 TokenContexts.
Tom Andersonc3ed8962017-10-09 19:01:46434
435 Returns:
436 A tuple (data, blame).
437 data: File contents.
438 blame: A list of TokenContexts.
439 """
Tom Anderson5002161142018-04-17 00:30:21440 DIFF_CONTEXT = 3
Tom Anderson65410152017-10-17 01:53:19441 cmd_git_log = [
Tom Andersonb3d7e642018-04-13 16:23:42442 'git', 'log', '--minimal', '--no-prefix', '--follow', '-m',
Tom Anderson5002161142018-04-17 00:30:21443 '--first-parent', '-p',
444 '-U%d' % DIFF_CONTEXT, '-z', '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B',
445 revision, '--', file_name
Tom Anderson65410152017-10-17 01:53:19446 ]
Tom Andersonb3d7e642018-04-13 16:23:42447 git_log = subprocess.Popen(
448 cmd_git_log, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Tom Andersonc3ed8962017-10-09 19:01:46449 data = subprocess.check_output(
450 ['git', 'show', '%s:%s' % (revision, file_name)])
Tom Andersonb3d7e642018-04-13 16:23:42451 data, blame = uberblame_aux(file_name, git_log.stdout, data,
452 tokenization_method)
Tom Andersonc3ed8962017-10-09 19:01:46453
454 _, stderr = git_log.communicate()
455 if git_log.returncode != 0:
456 raise subprocess.CalledProcessError(git_log.returncode, cmd_git_log, stderr)
457 return data, blame
458
459
460def generate_pastel_color():
Tom Andersonb3d7e642018-04-13 16:23:42461 """Generates a random color from a nice looking pastel palette.
462
463 Returns:
464 The color, formatted as hex string. For example, white is "#FFFFFF".
465 """
466 (h, l, s) = (random.uniform(0, 1), random.uniform(0.8, 0.9), random.uniform(
467 0.5, 1))
Tom Andersonc3ed8962017-10-09 19:01:46468 (r, g, b) = colorsys.hls_to_rgb(h, l, s)
Tom Andersonb3d7e642018-04-13 16:23:42469 return "#%0.2X%0.2X%0.2X" % (int(r * 255), int(g * 255), int(b * 255))
Tom Andersonc3ed8962017-10-09 19:01:46470
471
Tom Andersonb3d7e642018-04-13 16:23:42472def create_visualization(data, blame):
473 """Creates a web page to visualize |blame|.
Tom Andersonc3ed8962017-10-09 19:01:46474
475 Args:
476 data: The data file as returned by uberblame().
477 blame: A list of TokenContexts as returned by uberblame().
Tom Andersonb3d7e642018-04-13 16:23:42478
479 Returns;
480 The html for the generated page, as a string.
Tom Andersonc3ed8962017-10-09 19:01:46481 """
482 # Use the same seed for the color generator on each run so that
483 # loading the same blame of the same file twice will result in the
484 # same generated HTML page.
485 random.seed(0x52937865ec62d1ea)
486 html = """\
487 <html>
488 <head>
489 <style>
490 body {
491 font-family: "Courier New";
492 }
493 pre {
494 display: inline;
495 }
Tom Andersonc3ed8962017-10-09 19:01:46496 span {
Tom Andersona671dad2017-10-10 19:19:47497 outline: 1pt solid #00000030;
498 outline-offset: -1pt;
Tom Anderson65410152017-10-17 01:53:19499 cursor: pointer;
Tom Andersonc3ed8962017-10-09 19:01:46500 }
501 #linenums {
502 text-align: right;
503 }
Tom Anderson65410152017-10-17 01:53:19504 #file_display {
505 position: absolute;
506 left: 0;
507 top: 0;
508 width: 50%%;
509 height: 100%%;
510 overflow: scroll;
511 }
512 #commit_display_container {
513 position: absolute;
514 left: 50%%;
515 top: 0;
516 width: 50%%;
517 height: 100%%;
518 overflow: scroll;
519 }
Tom Andersonc3ed8962017-10-09 19:01:46520 </style>
Tom Anderson65410152017-10-17 01:53:19521 <script>
522 commit_data = %s;
523 function display_commit(hash) {
524 var e = document.getElementById("commit_display");
525 e.innerHTML = commit_data[hash]
526 }
527 </script>
Tom Andersonc3ed8962017-10-09 19:01:46528 </head>
529 <body>
Tom Anderson65410152017-10-17 01:53:19530 <div id="file_display">
531 <table>
532 <tbody>
533 <tr>
534 <td valign="top" id="linenums">
535 <pre>%s</pre>
536 </td>
537 <td valign="top">
538 <pre>%s</pre>
539 </td>
540 </tr>
541 </tbody>
542 </table>
543 </div>
544 <div id="commit_display_container" valign="top">
545 <pre id="commit_display" />
546 </div>
Tom Andersonc3ed8962017-10-09 19:01:46547 </body>
548 </html>
549 """
550 html = textwrap.dedent(html)
Tom Anderson65410152017-10-17 01:53:19551 commits = {}
Tom Andersonc3ed8962017-10-09 19:01:46552 lines = []
553 commit_colors = {}
554 blame_index = 0
555 blame = [context for contexts in blame for context in contexts]
556 row = 0
557 lastline = ''
558 for line in data.split('\n'):
559 lastline = line
560 column = 0
561 for c in line + '\n':
562 if blame_index < len(blame):
563 token_context = blame[blame_index]
564 if (row == token_context.row and
565 column == token_context.column + len(token_context.token)):
Tom Andersonb3d7e642018-04-13 16:23:42566 if (blame_index + 1 == len(blame) or blame[blame_index].commit.hash !=
Tom Anderson65410152017-10-17 01:53:19567 blame[blame_index + 1].commit.hash):
568 lines.append('</span>')
Tom Andersonc3ed8962017-10-09 19:01:46569 blame_index += 1
570 if blame_index < len(blame):
571 token_context = blame[blame_index]
572 if row == token_context.row and column == token_context.column:
Tom Andersonb3d7e642018-04-13 16:23:42573 if (blame_index == 0 or blame[blame_index - 1].commit.hash !=
Tom Anderson65410152017-10-17 01:53:19574 blame[blame_index].commit.hash):
575 hash = token_context.commit.hash
576 commits[hash] = token_context.commit
577 if hash not in commit_colors:
578 commit_colors[hash] = generate_pastel_color()
579 color = commit_colors[hash]
Tom Andersonb3d7e642018-04-13 16:23:42580 lines.append(('<span style="background-color: %s" ' +
581 'onclick="display_commit(&quot;%s&quot;)">') % (color,
582 hash))
Tom Andersonc3ed8962017-10-09 19:01:46583 lines.append(cgi.escape(c))
584 column += 1
585 row += 1
Tom Anderson65410152017-10-17 01:53:19586 commit_data = ['{']
587 commit_display_format = """\
588 commit: {hash}
589 Author: {author_name} <{author_email}>
590 Date: {author_date}
591
592 {message}
593 """
594 commit_display_format = textwrap.dedent(commit_display_format)
595 links = re.compile(r'(https?:\/\/\S+)')
596 for hash in commits:
597 commit = commits[hash]
598 commit_display = commit_display_format.format(
599 hash=hash,
600 author_name=commit.author_name,
601 author_email=commit.author_email,
602 author_date=commit.author_date,
Tom Andersonb3d7e642018-04-13 16:23:42603 message=commit.message)
Tom Anderson65410152017-10-17 01:53:19604 commit_display = cgi.escape(commit_display, quote=True)
Tom Andersonb3d7e642018-04-13 16:23:42605 commit_display = re.sub(links, '<a href=\\"\\1\\">\\1</a>', commit_display)
Tom Anderson65410152017-10-17 01:53:19606 commit_display = commit_display.replace('\n', '\\n')
607 commit_data.append('"%s": "%s",' % (hash, commit_display))
608 commit_data.append('}')
609 commit_data = ''.join(commit_data)
Tom Andersonc3ed8962017-10-09 19:01:46610 line_nums = range(1, row if lastline.strip() == '' else row + 1)
611 line_nums = '\n'.join([str(num) for num in line_nums])
612 lines = ''.join(lines)
Tom Anderson65410152017-10-17 01:53:19613 return html % (commit_data, line_nums, lines)
Tom Andersonc3ed8962017-10-09 19:01:46614
615
616def show_visualization(html):
617 """Display |html| in a web browser.
618
619 Args:
620 html: The contents of the file to display, as a string.
621 """
622 # Keep the temporary file around so the browser has time to open it.
623 # TODO(thomasanderson): spin up a temporary web server to serve this
624 # file so we don't have to leak it.
625 html_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html')
626 html_file.write(html)
627 html_file.flush()
628 if sys.platform.startswith('linux'):
629 # Don't show any messages when starting the browser.
630 saved_stdout = os.dup(1)
631 saved_stderr = os.dup(2)
632 os.close(1)
633 os.close(2)
634 os.open(os.devnull, os.O_RDWR)
635 os.open(os.devnull, os.O_RDWR)
636 webbrowser.open('file://' + html_file.name)
637 if sys.platform.startswith('linux'):
638 os.dup2(saved_stdout, 1)
639 os.dup2(saved_stderr, 2)
640 os.close(saved_stdout)
641 os.close(saved_stderr)
642
643
Tom Andersonb3d7e642018-04-13 16:23:42644def main(argv):
Tom Andersonc3ed8962017-10-09 19:01:46645 parser = argparse.ArgumentParser(
Tom Andersonb3d7e642018-04-13 16:23:42646 description='Show what revision last modified each token of a file.')
647 parser.add_argument(
648 'revision',
649 default='HEAD',
650 nargs='?',
651 help='show only commits starting from a revision')
652 parser.add_argument('file', help='the file to uberblame')
653 parser.add_argument(
654 '--skip-visualization',
655 action='store_true',
656 help='do not display the blame visualization in a web browser')
657 parser.add_argument(
658 '--tokenize-by-char',
659 action='store_true',
660 help='treat individual characters as tokens')
661 parser.add_argument(
662 '--tokenize-whitespace',
663 action='store_true',
664 help='also blame non-newline whitespace characters')
665 args = parser.parse_args(argv)
Tom Andersonc3ed8962017-10-09 19:01:46666
Tom Andersonb3d7e642018-04-13 16:23:42667 def tokenization_method(data):
668 return tokenize_data(data, args.tokenize_by_char, args.tokenize_whitespace)
669
670 data, blame = uberblame(args.file, args.revision, tokenization_method)
671 html = create_visualization(data, blame)
672 if not args.skip_visualization:
673 show_visualization(html)
Tom Andersonc3ed8962017-10-09 19:01:46674 return 0
675
676
677if __name__ == '__main__':
Tom Andersonb3d7e642018-04-13 16:23:42678 sys.exit(main(sys.argv[1:]))