blob: b6f1132c16d4c05f054bd84a9b0ca4feda0e7b57 [file] [log] [blame]
Tom Anderson773d8092022-03-23 20:47:291#!/usr/bin/env python3
Avi Drissmandfd880852022-09-15 20:11:092# Copyright 2017 The Chromium Authors
Tom Andersonc3ed8962017-10-09 19:01:463# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
Tom Andersonc3ed8962017-10-09 19:01:467import colorsys
8import difflib
Tom Anderson773d8092022-03-23 20:47:299import html
Tom Andersonc3ed8962017-10-09 19:01:4610import random
11import os
12import re
13import subprocess
14import sys
15import tempfile
16import textwrap
17import webbrowser
18
19
20class TokenContext(object):
21 """Metadata about a token.
22
23 Attributes:
24 row: Row index of the token in the data file.
25 column: Column index of the token in the data file.
26 token: The token string.
Tom Anderson65410152017-10-17 01:53:1927 commit: A Commit object that corresponds to the commit that added
28 this token.
Tom Andersonc3ed8962017-10-09 19:01:4629 """
Tom Andersonb3d7e642018-04-13 16:23:4230
Tom Andersonc3ed8962017-10-09 19:01:4631 def __init__(self, row, column, token, commit=None):
32 self.row = row
33 self.column = column
34 self.token = token
35 self.commit = commit
36
37
38class Commit(object):
39 """Commit data.
40
41 Attributes:
42 hash: The commit hash.
Tom Anderson65410152017-10-17 01:53:1943 author_name: The author's name.
44 author_email: the author's email.
45 author_date: The date and time the author created this commit.
46 message: The commit message.
Tom Andersonc3ed8962017-10-09 19:01:4647 diff: The commit diff.
48 """
Tom Andersonb3d7e642018-04-13 16:23:4249
Tom Anderson65410152017-10-17 01:53:1950 def __init__(self, hash, author_name, author_email, author_date, message,
51 diff):
Tom Andersonc3ed8962017-10-09 19:01:4652 self.hash = hash
Tom Anderson65410152017-10-17 01:53:1953 self.author_name = author_name
54 self.author_email = author_email
55 self.author_date = author_date
56 self.message = message
Tom Andersonc3ed8962017-10-09 19:01:4657 self.diff = diff
58
59
Tom Andersonb3d7e642018-04-13 16:23:4260def tokenize_data(data, tokenize_by_char, tokenize_whitespace):
Tom Andersonc3ed8962017-10-09 19:01:4661 """Tokenizes |data|.
62
63 Args:
64 data: String to tokenize.
Tom Andersonb3d7e642018-04-13 16:23:4265 tokenize_by_char: If true, individual characters are treated as tokens.
66 Otherwise, tokens are either symbols or strings of both alphanumeric
67 characters and underscores.
68 tokenize_whitespace: Treat non-newline whitespace characters as tokens.
Tom Andersonc3ed8962017-10-09 19:01:4669
70 Returns:
Tom Andersonb3d7e642018-04-13 16:23:4271 A list of lists of TokenContexts. Each list represents a line.
Tom Andersonc3ed8962017-10-09 19:01:4672 """
73 contexts = []
74 in_identifier = False
75 identifier_start = 0
76 identifier = ''
77 row = 0
78 column = 0
79 line_contexts = []
80
Tom Andersonac918bb62018-04-17 00:37:3481 for c in data:
Tom Andersonb3d7e642018-04-13 16:23:4282 if not tokenize_by_char and (c.isalnum() or c == '_'):
Tom Andersonc3ed8962017-10-09 19:01:4683 if in_identifier:
84 identifier += c
85 else:
86 in_identifier = True
87 identifier_start = column
88 identifier = c
89 else:
90 if in_identifier:
Tom Andersonb3d7e642018-04-13 16:23:4291 line_contexts.append(TokenContext(row, identifier_start, identifier))
Tom Andersonc3ed8962017-10-09 19:01:4692 in_identifier = False
Tom Andersonb3d7e642018-04-13 16:23:4293 if not c.isspace() or (tokenize_whitespace and c != '\n'):
Tom Andersonc3ed8962017-10-09 19:01:4694 line_contexts.append(TokenContext(row, column, c))
95
96 if c == '\n':
97 row += 1
98 column = 0
99 contexts.append(line_contexts)
100 line_tokens = []
101 line_contexts = []
102 else:
103 column += 1
Tom Andersonac918bb62018-04-17 00:37:34104 contexts.append(line_contexts)
Tom Andersonc3ed8962017-10-09 19:01:46105 return contexts
106
107
108def compute_unified_diff(old_tokens, new_tokens):
109 """Computes the diff between |old_tokens| and |new_tokens|.
110
111 Args:
112 old_tokens: Token strings corresponding to the old data.
113 new_tokens: Token strings corresponding to the new data.
114
115 Returns:
116 The diff, in unified diff format.
117 """
118 return difflib.unified_diff(old_tokens, new_tokens, n=0, lineterm='')
119
120
121def parse_chunk_header_file_range(file_range):
122 """Parses a chunk header file range.
123
124 Diff chunk headers have the form:
125 @@ -<file-range> +<file-range> @@
126 File ranges have the form:
127 <start line number>,<number of lines changed>
128
129 Args:
130 file_range: A chunk header file range.
131
132 Returns:
133 A tuple (range_start, range_end). The endpoints are adjusted such that
134 iterating over [range_start, range_end) will give the changed indices.
135 """
136 if ',' in file_range:
137 file_range_parts = file_range.split(',')
138 start = int(file_range_parts[0])
139 amount = int(file_range_parts[1])
140 if amount == 0:
141 return (start, start)
142 return (start - 1, start + amount - 1)
143 else:
144 return (int(file_range) - 1, int(file_range))
145
146
147def compute_changed_token_indices(previous_tokens, current_tokens):
148 """Computes changed and added tokens.
149
150 Args:
151 previous_tokens: Tokens corresponding to the old file.
152 current_tokens: Tokens corresponding to the new file.
153
154 Returns:
155 A tuple (added_tokens, changed_tokens).
156 added_tokens: A list of indices into |current_tokens|.
157 changed_tokens: A map of indices into |current_tokens| to
158 indices into |previous_tokens|.
159 """
160 prev_file_chunk_end = 0
161 prev_patched_chunk_end = 0
162 added_tokens = []
163 changed_tokens = {}
164 for line in compute_unified_diff(previous_tokens, current_tokens):
165 if line.startswith("@@"):
166 parts = line.split(' ')
167 removed = parts[1].lstrip('-')
168 removed_start, removed_end = parse_chunk_header_file_range(removed)
169 added = parts[2].lstrip('+')
170 added_start, added_end = parse_chunk_header_file_range(added)
171 for i in range(added_start, added_end):
172 added_tokens.append(i)
173 for i in range(0, removed_start - prev_patched_chunk_end):
174 changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
175 prev_patched_chunk_end = removed_end
176 prev_file_chunk_end = added_end
177 for i in range(0, len(previous_tokens) - prev_patched_chunk_end):
178 changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
179 return added_tokens, changed_tokens
180
181
182def flatten_nested_list(l):
183 """Flattens a list and provides a mapping from elements in the list back
184 into the nested list.
185
186 Args:
187 l: A list of lists.
188
189 Returns:
190 A tuple (flattened, index_to_position):
191 flattened: The flattened list.
192 index_to_position: A list of pairs (r, c) such that
193 index_to_position[i] == (r, c); flattened[i] == l[r][c]
194 """
195 flattened = []
196 index_to_position = {}
197 r = 0
198 c = 0
199 for nested_list in l:
200 for element in nested_list:
201 index_to_position[len(flattened)] = (r, c)
202 flattened.append(element)
203 c += 1
204 r += 1
205 c = 0
206 return (flattened, index_to_position)
207
208
209def compute_changed_token_positions(previous_tokens, current_tokens):
210 """Computes changed and added token positions.
211
212 Args:
213 previous_tokens: A list of lists of token strings. Lines in the file
214 correspond to the nested lists.
215 current_tokens: A list of lists of token strings. Lines in the file
216 correspond to the nested lists.
217
218 Returns:
219 A tuple (added_token_positions, changed_token_positions):
220 added_token_positions: A list of pairs that index into |current_tokens|.
221 changed_token_positions: A map from pairs that index into
222 |current_tokens| to pairs that index into |previous_tokens|.
223 """
224 flat_previous_tokens, previous_index_to_position = flatten_nested_list(
225 previous_tokens)
226 flat_current_tokens, current_index_to_position = flatten_nested_list(
227 current_tokens)
228 added_indices, changed_indices = compute_changed_token_indices(
229 flat_previous_tokens, flat_current_tokens)
230 added_token_positions = [current_index_to_position[i] for i in added_indices]
231 changed_token_positions = {
232 current_index_to_position[current_i]:
233 previous_index_to_position[changed_indices[current_i]]
234 for current_i in changed_indices
235 }
236 return (added_token_positions, changed_token_positions)
237
238
239def parse_chunks_from_diff(diff):
240 """Returns a generator of chunk data from a diff.
241
242 Args:
243 diff: A list of strings, with each string being a line from a diff
244 in unified diff format.
245
246 Returns:
Tom Anderson5002161142018-04-17 00:30:21247 A generator of tuples (added_lines_start, added_lines_end, removed_lines)
Tom Andersonc3ed8962017-10-09 19:01:46248 """
Tom Anderson5002161142018-04-17 00:30:21249 it = iter(diff)
250 for line in it:
251 while not line.startswith('@@'):
Tom Anderson773d8092022-03-23 20:47:29252 line = next(it)
Tom Anderson5002161142018-04-17 00:30:21253 parts = line.split(' ')
254 previous_start, previous_end = parse_chunk_header_file_range(
255 parts[1].lstrip('-'))
256 current_start, current_end = parse_chunk_header_file_range(
257 parts[2].lstrip('+'))
258
259 in_delta = False
260 added_lines_start = None
261 added_lines_end = None
262 removed_lines = []
263 while previous_start < previous_end or current_start < current_end:
Tom Anderson773d8092022-03-23 20:47:29264 line = next(it)
Tom Anderson5002161142018-04-17 00:30:21265 firstchar = line[0]
266 line = line[1:]
267 if not in_delta and (firstchar == '-' or firstchar == '+'):
268 in_delta = True
269 added_lines_start = current_start
270 added_lines_end = current_start
271 removed_lines = []
272
273 if firstchar == '-':
274 removed_lines.append(line)
275 previous_start += 1
276 elif firstchar == '+':
277 current_start += 1
278 added_lines_end = current_start
279 elif firstchar == ' ':
280 if in_delta:
281 in_delta = False
282 yield (added_lines_start, added_lines_end, removed_lines)
283 previous_start += 1
284 current_start += 1
285 if in_delta:
286 yield (added_lines_start, added_lines_end, removed_lines)
Tom Andersonc3ed8962017-10-09 19:01:46287
288
289def should_skip_commit(commit):
290 """Decides if |commit| should be skipped when computing the blame.
291
292 Commit 5d4451e deleted all files in the repo except for DEPS. The
293 next commit, 1e7896, brought them back. This is a hack to skip
294 those commits (except for the files they modified). If we did not
295 do this, changes would be incorrectly attributed to 1e7896.
296
297 Args:
298 commit: A Commit object.
299
300 Returns:
301 A boolean indicating if this commit should be skipped.
302 """
303 banned_commits = [
304 '1e78967ed2f1937b3809c19d91e7dd62d756d307',
305 '5d4451ebf298d9d71f716cc0135f465cec41fcd0',
306 ]
307 if commit.hash not in banned_commits:
308 return False
309 banned_commits_file_exceptions = [
310 'DEPS',
311 'chrome/browser/ui/views/file_manager_dialog_browsertest.cc',
312 ]
313 for line in commit.diff:
314 if line.startswith('---') or line.startswith('+++'):
315 if line.split(' ')[1] in banned_commits_file_exceptions:
316 return False
317 elif line.startswith('@@'):
318 return True
319 assert False
320
321
Tom Anderson1e716922017-10-12 19:43:49322def generate_substrings(file):
323 """Generates substrings from a file stream, where substrings are
324 separated by '\0'.
Tom Andersonc3ed8962017-10-09 19:01:46325
Tom Anderson1e716922017-10-12 19:43:49326 For example, the input:
327 'a\0bc\0\0\0d\0'
Tom Andersonc3ed8962017-10-09 19:01:46328 would produce the output:
Tom Anderson1e716922017-10-12 19:43:49329 ['a', 'bc', 'd']
Tom Andersonc3ed8962017-10-09 19:01:46330
331 Args:
Tom Anderson1e716922017-10-12 19:43:49332 file: A readable file.
Tom Andersonc3ed8962017-10-09 19:01:46333 """
Tom Anderson65410152017-10-17 01:53:19334 BUF_SIZE = 448 # Experimentally found to be pretty fast.
335 data = []
Tom Anderson1e716922017-10-12 19:43:49336 while True:
Tom Anderson65410152017-10-17 01:53:19337 buf = file.read(BUF_SIZE)
Tom Anderson773d8092022-03-23 20:47:29338 parts = buf.split(b'\0')
Tom Anderson65410152017-10-17 01:53:19339 data.append(parts[0])
340 if len(parts) > 1:
Tom Anderson773d8092022-03-23 20:47:29341 joined = b''.join(data)
342 if joined != b'':
343 yield joined.decode()
Tom Anderson65410152017-10-17 01:53:19344 for i in range(1, len(parts) - 1):
Tom Anderson773d8092022-03-23 20:47:29345 if parts[i] != b'':
346 yield parts[i].decode()
Tom Anderson65410152017-10-17 01:53:19347 data = [parts[-1]]
348 if len(buf) < BUF_SIZE:
Tom Anderson773d8092022-03-23 20:47:29349 joined = b''.join(data)
350 if joined != b'':
351 yield joined.decode()
Tom Anderson65410152017-10-17 01:53:19352 return
Tom Andersonc3ed8962017-10-09 19:01:46353
354
355def generate_commits(git_log_stdout):
356 """Parses git log output into a stream of Commit objects.
357 """
Tom Anderson1e716922017-10-12 19:43:49358 substring_generator = generate_substrings(git_log_stdout)
Tom Anderson773d8092022-03-23 20:47:29359 try:
360 while True:
361 hash = next(substring_generator)
362 author_name = next(substring_generator)
363 author_email = next(substring_generator)
364 author_date = next(substring_generator)
365 message = next(substring_generator).rstrip('\n')
366 diff = next(substring_generator).split('\n')[1:-1]
367 yield Commit(hash, author_name, author_email, author_date, message, diff)
368 except StopIteration:
369 pass
Tom Andersonc3ed8962017-10-09 19:01:46370
371
Tom Andersonb3d7e642018-04-13 16:23:42372def uberblame_aux(file_name, git_log_stdout, data, tokenization_method):
Tom Andersonc3ed8962017-10-09 19:01:46373 """Computes the uberblame of file |file_name|.
374
375 Args:
376 file_name: File to uberblame.
377 git_log_stdout: A file object that represents the git log output.
378 data: A string containing the data of file |file_name|.
Tom Andersonb3d7e642018-04-13 16:23:42379 tokenization_method: A function that takes a string and returns a list of
380 TokenContexts.
Tom Andersonc3ed8962017-10-09 19:01:46381
382 Returns:
383 A tuple (data, blame).
384 data: File contents.
385 blame: A list of TokenContexts.
386 """
Tom Andersonb3d7e642018-04-13 16:23:42387 blame = tokenization_method(data)
Tom Andersonc3ed8962017-10-09 19:01:46388
389 blamed_tokens = 0
Tom Andersonc3ed8962017-10-09 19:01:46390 uber_blame = (data, blame[:])
391
392 for commit in generate_commits(git_log_stdout):
393 if should_skip_commit(commit):
394 continue
395
396 offset = 0
Tom Anderson5002161142018-04-17 00:30:21397 for (added_lines_start, added_lines_end,
398 removed_lines) in parse_chunks_from_diff(commit.diff):
Tom Andersonc3ed8962017-10-09 19:01:46399 added_lines_start += offset
400 added_lines_end += offset
Tom Andersonb3d7e642018-04-13 16:23:42401 previous_contexts = [
402 token_lines
403 for line_previous in removed_lines
404 for token_lines in tokenization_method(line_previous)
Tom Andersonc3ed8962017-10-09 19:01:46405 ]
Tom Andersonb3d7e642018-04-13 16:23:42406 previous_tokens = [[context.token for context in contexts]
407 for contexts in previous_contexts]
Tom Andersonc3ed8962017-10-09 19:01:46408 current_contexts = blame[added_lines_start:added_lines_end]
Tom Andersonb3d7e642018-04-13 16:23:42409 current_tokens = [[context.token for context in contexts]
410 for contexts in current_contexts]
Tom Andersonc3ed8962017-10-09 19:01:46411 added_token_positions, changed_token_positions = (
412 compute_changed_token_positions(previous_tokens, current_tokens))
413 for r, c in added_token_positions:
Tom Anderson65410152017-10-17 01:53:19414 current_contexts[r][c].commit = commit
Tom Andersonc3ed8962017-10-09 19:01:46415 blamed_tokens += 1
416 for r, c in changed_token_positions:
417 pr, pc = changed_token_positions[(r, c)]
418 previous_contexts[pr][pc] = current_contexts[r][c]
419
420 assert added_lines_start <= added_lines_end <= len(blame)
421 current_blame_size = len(blame)
422 blame[added_lines_start:added_lines_end] = previous_contexts
423 offset += len(blame) - current_blame_size
424
425 assert blame == [] or blame == [[]]
426 return uber_blame
427
428
Tom Andersonb3d7e642018-04-13 16:23:42429def uberblame(file_name, revision, tokenization_method):
Tom Andersonc3ed8962017-10-09 19:01:46430 """Computes the uberblame of file |file_name|.
431
432 Args:
433 file_name: File to uberblame.
434 revision: The revision to start the uberblame at.
Tom Andersonb3d7e642018-04-13 16:23:42435 tokenization_method: A function that takes a string and returns a list of
436 TokenContexts.
Tom Andersonc3ed8962017-10-09 19:01:46437
438 Returns:
439 A tuple (data, blame).
440 data: File contents.
441 blame: A list of TokenContexts.
442 """
Tom Anderson5002161142018-04-17 00:30:21443 DIFF_CONTEXT = 3
Tom Anderson65410152017-10-17 01:53:19444 cmd_git_log = [
Tom Andersonb3d7e642018-04-13 16:23:42445 'git', 'log', '--minimal', '--no-prefix', '--follow', '-m',
Tom Anderson5002161142018-04-17 00:30:21446 '--first-parent', '-p',
447 '-U%d' % DIFF_CONTEXT, '-z', '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B',
448 revision, '--', file_name
Tom Anderson65410152017-10-17 01:53:19449 ]
Tom Andersonb3d7e642018-04-13 16:23:42450 git_log = subprocess.Popen(
451 cmd_git_log, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Tom Andersonc3ed8962017-10-09 19:01:46452 data = subprocess.check_output(
Tom Anderson773d8092022-03-23 20:47:29453 ['git', 'show', '%s:%s' % (revision, file_name)]).decode()
Tom Andersonb3d7e642018-04-13 16:23:42454 data, blame = uberblame_aux(file_name, git_log.stdout, data,
455 tokenization_method)
Tom Andersonc3ed8962017-10-09 19:01:46456
Tom Anderson773d8092022-03-23 20:47:29457 stderr = git_log.communicate()[1].decode()
Tom Andersonc3ed8962017-10-09 19:01:46458 if git_log.returncode != 0:
459 raise subprocess.CalledProcessError(git_log.returncode, cmd_git_log, stderr)
460 return data, blame
461
462
463def generate_pastel_color():
Tom Andersonb3d7e642018-04-13 16:23:42464 """Generates a random color from a nice looking pastel palette.
465
466 Returns:
467 The color, formatted as hex string. For example, white is "#FFFFFF".
468 """
469 (h, l, s) = (random.uniform(0, 1), random.uniform(0.8, 0.9), random.uniform(
470 0.5, 1))
Tom Andersonc3ed8962017-10-09 19:01:46471 (r, g, b) = colorsys.hls_to_rgb(h, l, s)
Tom Andersonb3d7e642018-04-13 16:23:42472 return "#%0.2X%0.2X%0.2X" % (int(r * 255), int(g * 255), int(b * 255))
Tom Andersonc3ed8962017-10-09 19:01:46473
474
Tom Andersonac918bb62018-04-17 00:37:34475def colorize_diff(diff):
476 """Colorizes a diff for use in an HTML page.
477
478 Args:
479 diff: The diff, in unified diff format, as a list of line strings.
480
481 Returns:
482 The HTML-formatted diff, as a string. The diff will already be escaped.
483 """
484
485 colorized = []
486 for line in diff:
Tom Anderson773d8092022-03-23 20:47:29487 escaped = html.escape(line.replace('\r', ''), quote=True)
Tom Andersonac918bb62018-04-17 00:37:34488 if line.startswith('+'):
489 colorized.append('<span class=\\"addition\\">%s</span>' % escaped)
490 elif line.startswith('-'):
491 colorized.append('<span class=\\"deletion\\">%s</span>' % escaped)
492 elif line.startswith('@@'):
493 context_begin = escaped.find('@@', 2)
494 assert context_begin != -1
495 colorized.append(
496 '<span class=\\"chunk_meta\\">%s</span>'
497 '<span class=\\"chunk_context\\">%s</span'
498 % (escaped[0:context_begin + 2], escaped[context_begin + 2:]))
499 elif line.startswith('diff') or line.startswith('index'):
500 colorized.append('<span class=\\"file_header\\">%s</span>' % escaped)
501 else:
502 colorized.append('<span class=\\"context_line\\">%s</span>' % escaped)
503 return '\n'.join(colorized)
504
505
Tom Andersonb3d7e642018-04-13 16:23:42506def create_visualization(data, blame):
507 """Creates a web page to visualize |blame|.
Tom Andersonc3ed8962017-10-09 19:01:46508
509 Args:
510 data: The data file as returned by uberblame().
511 blame: A list of TokenContexts as returned by uberblame().
Tom Andersonb3d7e642018-04-13 16:23:42512
Tom Andersonac918bb62018-04-17 00:37:34513 Returns:
514 The HTML for the generated page, as a string.
Tom Andersonc3ed8962017-10-09 19:01:46515 """
516 # Use the same seed for the color generator on each run so that
517 # loading the same blame of the same file twice will result in the
518 # same generated HTML page.
519 random.seed(0x52937865ec62d1ea)
Tom Anderson773d8092022-03-23 20:47:29520 page = """\
Tom Andersonc3ed8962017-10-09 19:01:46521 <html>
522 <head>
523 <style>
524 body {
Tom Andersonac918bb62018-04-17 00:37:34525 font-family: monospace;
Tom Andersonc3ed8962017-10-09 19:01:46526 }
527 pre {
528 display: inline;
529 }
Tom Andersonac918bb62018-04-17 00:37:34530 .token {
Tom Andersona671dad2017-10-10 19:19:47531 outline: 1pt solid #00000030;
532 outline-offset: -1pt;
Tom Anderson65410152017-10-17 01:53:19533 cursor: pointer;
Tom Andersonc3ed8962017-10-09 19:01:46534 }
Tom Andersonac918bb62018-04-17 00:37:34535 .addition {
536 color: #080;
537 }
538 .deletion {
539 color: #c00;
540 }
541 .chunk_meta {
542 color: #099;
543 }
544 .context_line .chunk_context {
545 // Just normal text.
546 }
547 .file_header {
548 font-weight: bold;
549 }
Tom Andersonc3ed8962017-10-09 19:01:46550 #linenums {
551 text-align: right;
552 }
Tom Anderson65410152017-10-17 01:53:19553 #file_display {
554 position: absolute;
555 left: 0;
556 top: 0;
557 width: 50%%;
558 height: 100%%;
559 overflow: scroll;
560 }
561 #commit_display_container {
562 position: absolute;
563 left: 50%%;
564 top: 0;
565 width: 50%%;
566 height: 100%%;
567 overflow: scroll;
568 }
Tom Andersonc3ed8962017-10-09 19:01:46569 </style>
Tom Anderson65410152017-10-17 01:53:19570 <script>
571 commit_data = %s;
572 function display_commit(hash) {
573 var e = document.getElementById("commit_display");
574 e.innerHTML = commit_data[hash]
575 }
576 </script>
Tom Andersonc3ed8962017-10-09 19:01:46577 </head>
578 <body>
Tom Anderson65410152017-10-17 01:53:19579 <div id="file_display">
580 <table>
581 <tbody>
582 <tr>
583 <td valign="top" id="linenums">
584 <pre>%s</pre>
585 </td>
586 <td valign="top">
587 <pre>%s</pre>
588 </td>
589 </tr>
590 </tbody>
591 </table>
592 </div>
593 <div id="commit_display_container" valign="top">
594 <pre id="commit_display" />
595 </div>
Tom Andersonc3ed8962017-10-09 19:01:46596 </body>
597 </html>
598 """
Tom Anderson773d8092022-03-23 20:47:29599 page = textwrap.dedent(page)
Tom Anderson65410152017-10-17 01:53:19600 commits = {}
Tom Andersonc3ed8962017-10-09 19:01:46601 lines = []
602 commit_colors = {}
603 blame_index = 0
604 blame = [context for contexts in blame for context in contexts]
605 row = 0
606 lastline = ''
607 for line in data.split('\n'):
608 lastline = line
609 column = 0
610 for c in line + '\n':
611 if blame_index < len(blame):
612 token_context = blame[blame_index]
613 if (row == token_context.row and
614 column == token_context.column + len(token_context.token)):
Tom Andersonb3d7e642018-04-13 16:23:42615 if (blame_index + 1 == len(blame) or blame[blame_index].commit.hash !=
Tom Anderson65410152017-10-17 01:53:19616 blame[blame_index + 1].commit.hash):
617 lines.append('</span>')
Tom Andersonc3ed8962017-10-09 19:01:46618 blame_index += 1
619 if blame_index < len(blame):
620 token_context = blame[blame_index]
621 if row == token_context.row and column == token_context.column:
Tom Andersonb3d7e642018-04-13 16:23:42622 if (blame_index == 0 or blame[blame_index - 1].commit.hash !=
Tom Anderson65410152017-10-17 01:53:19623 blame[blame_index].commit.hash):
624 hash = token_context.commit.hash
625 commits[hash] = token_context.commit
626 if hash not in commit_colors:
627 commit_colors[hash] = generate_pastel_color()
628 color = commit_colors[hash]
Tom Andersonac918bb62018-04-17 00:37:34629 lines.append(('<span class="token" style="background-color: %s" ' +
Tom Andersonb3d7e642018-04-13 16:23:42630 'onclick="display_commit(&quot;%s&quot;)">') % (color,
631 hash))
Tom Anderson773d8092022-03-23 20:47:29632 lines.append(html.escape(c))
Tom Andersonc3ed8962017-10-09 19:01:46633 column += 1
634 row += 1
Tom Andersonac918bb62018-04-17 00:37:34635 commit_data = ['{\n']
Tom Anderson65410152017-10-17 01:53:19636 commit_display_format = """\
637 commit: {hash}
638 Author: {author_name} <{author_email}>
639 Date: {author_date}
640
641 {message}
Tom Andersonac918bb62018-04-17 00:37:34642
Tom Anderson65410152017-10-17 01:53:19643 """
644 commit_display_format = textwrap.dedent(commit_display_format)
645 links = re.compile(r'(https?:\/\/\S+)')
646 for hash in commits:
647 commit = commits[hash]
648 commit_display = commit_display_format.format(
649 hash=hash,
650 author_name=commit.author_name,
651 author_email=commit.author_email,
652 author_date=commit.author_date,
Tom Andersonb3d7e642018-04-13 16:23:42653 message=commit.message)
Tom Anderson773d8092022-03-23 20:47:29654 commit_display = html.escape(commit_display, quote=True)
Tom Andersonac918bb62018-04-17 00:37:34655 commit_display += colorize_diff(commit.diff)
Tom Andersonb3d7e642018-04-13 16:23:42656 commit_display = re.sub(links, '<a href=\\"\\1\\">\\1</a>', commit_display)
Tom Anderson65410152017-10-17 01:53:19657 commit_display = commit_display.replace('\n', '\\n')
Tom Andersonac918bb62018-04-17 00:37:34658 commit_data.append('"%s": "%s",\n' % (hash, commit_display))
Tom Anderson65410152017-10-17 01:53:19659 commit_data.append('}')
660 commit_data = ''.join(commit_data)
Tom Andersonc3ed8962017-10-09 19:01:46661 line_nums = range(1, row if lastline.strip() == '' else row + 1)
662 line_nums = '\n'.join([str(num) for num in line_nums])
663 lines = ''.join(lines)
Tom Anderson773d8092022-03-23 20:47:29664 return page % (commit_data, line_nums, lines)
Tom Andersonc3ed8962017-10-09 19:01:46665
666
Tom Anderson773d8092022-03-23 20:47:29667def show_visualization(page):
Tom Andersonc3ed8962017-10-09 19:01:46668 """Display |html| in a web browser.
669
670 Args:
671 html: The contents of the file to display, as a string.
672 """
673 # Keep the temporary file around so the browser has time to open it.
674 # TODO(thomasanderson): spin up a temporary web server to serve this
675 # file so we don't have to leak it.
676 html_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html')
Tom Anderson773d8092022-03-23 20:47:29677 html_file.write(page.encode())
Tom Andersonc3ed8962017-10-09 19:01:46678 html_file.flush()
679 if sys.platform.startswith('linux'):
680 # Don't show any messages when starting the browser.
681 saved_stdout = os.dup(1)
682 saved_stderr = os.dup(2)
683 os.close(1)
684 os.close(2)
685 os.open(os.devnull, os.O_RDWR)
686 os.open(os.devnull, os.O_RDWR)
687 webbrowser.open('file://' + html_file.name)
688 if sys.platform.startswith('linux'):
689 os.dup2(saved_stdout, 1)
690 os.dup2(saved_stderr, 2)
691 os.close(saved_stdout)
692 os.close(saved_stderr)
693
694
Tom Andersonb3d7e642018-04-13 16:23:42695def main(argv):
Tom Andersonc3ed8962017-10-09 19:01:46696 parser = argparse.ArgumentParser(
Tom Andersonb3d7e642018-04-13 16:23:42697 description='Show what revision last modified each token of a file.')
698 parser.add_argument(
699 'revision',
700 default='HEAD',
701 nargs='?',
702 help='show only commits starting from a revision')
703 parser.add_argument('file', help='the file to uberblame')
704 parser.add_argument(
705 '--skip-visualization',
706 action='store_true',
707 help='do not display the blame visualization in a web browser')
708 parser.add_argument(
709 '--tokenize-by-char',
710 action='store_true',
711 help='treat individual characters as tokens')
712 parser.add_argument(
713 '--tokenize-whitespace',
714 action='store_true',
715 help='also blame non-newline whitespace characters')
716 args = parser.parse_args(argv)
Tom Andersonc3ed8962017-10-09 19:01:46717
Tom Andersonb3d7e642018-04-13 16:23:42718 def tokenization_method(data):
719 return tokenize_data(data, args.tokenize_by_char, args.tokenize_whitespace)
720
721 data, blame = uberblame(args.file, args.revision, tokenization_method)
722 html = create_visualization(data, blame)
723 if not args.skip_visualization:
724 show_visualization(html)
Tom Andersonc3ed8962017-10-09 19:01:46725 return 0
726
727
728if __name__ == '__main__':
Tom Andersonb3d7e642018-04-13 16:23:42729 sys.exit(main(sys.argv[1:]))