Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright 2017 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | import argparse |
| 7 | import cgi |
| 8 | import colorsys |
| 9 | import difflib |
| 10 | import random |
| 11 | import os |
| 12 | import re |
| 13 | import subprocess |
| 14 | import sys |
| 15 | import tempfile |
| 16 | import textwrap |
| 17 | import webbrowser |
| 18 | |
| 19 | |
| 20 | class TokenContext(object): |
| 21 | """Metadata about a token. |
| 22 | |
| 23 | Attributes: |
| 24 | row: Row index of the token in the data file. |
| 25 | column: Column index of the token in the data file. |
| 26 | token: The token string. |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 27 | commit: A Commit object that corresponds to the commit that added |
| 28 | this token. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 29 | """ |
| 30 | def __init__(self, row, column, token, commit=None): |
| 31 | self.row = row |
| 32 | self.column = column |
| 33 | self.token = token |
| 34 | self.commit = commit |
| 35 | |
| 36 | |
| 37 | class Commit(object): |
| 38 | """Commit data. |
| 39 | |
| 40 | Attributes: |
| 41 | hash: The commit hash. |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 42 | author_name: The author's name. |
| 43 | author_email: the author's email. |
| 44 | author_date: The date and time the author created this commit. |
| 45 | message: The commit message. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 46 | diff: The commit diff. |
| 47 | """ |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 48 | def __init__(self, hash, author_name, author_email, author_date, message, |
| 49 | diff): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 50 | self.hash = hash |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 51 | self.author_name = author_name |
| 52 | self.author_email = author_email |
| 53 | self.author_date = author_date |
| 54 | self.message = message |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 55 | self.diff = diff |
| 56 | |
| 57 | |
| 58 | def tokenize_data(data): |
| 59 | """Tokenizes |data|. |
| 60 | |
| 61 | Args: |
| 62 | data: String to tokenize. |
| 63 | |
| 64 | Returns: |
| 65 | A list of TokenContexts. |
| 66 | """ |
| 67 | contexts = [] |
| 68 | in_identifier = False |
| 69 | identifier_start = 0 |
| 70 | identifier = '' |
| 71 | row = 0 |
| 72 | column = 0 |
| 73 | line_contexts = [] |
| 74 | |
| 75 | for c in data + '\n': |
| 76 | if c.isalnum() or c == '_': |
| 77 | if in_identifier: |
| 78 | identifier += c |
| 79 | else: |
| 80 | in_identifier = True |
| 81 | identifier_start = column |
| 82 | identifier = c |
| 83 | else: |
| 84 | if in_identifier: |
| 85 | line_contexts.append( |
| 86 | TokenContext(row, identifier_start, identifier)) |
| 87 | in_identifier = False |
| 88 | if not c.isspace(): |
| 89 | line_contexts.append(TokenContext(row, column, c)) |
| 90 | |
| 91 | if c == '\n': |
| 92 | row += 1 |
| 93 | column = 0 |
| 94 | contexts.append(line_contexts) |
| 95 | line_tokens = [] |
| 96 | line_contexts = [] |
| 97 | else: |
| 98 | column += 1 |
| 99 | return contexts |
| 100 | |
| 101 | |
| 102 | def compute_unified_diff(old_tokens, new_tokens): |
| 103 | """Computes the diff between |old_tokens| and |new_tokens|. |
| 104 | |
| 105 | Args: |
| 106 | old_tokens: Token strings corresponding to the old data. |
| 107 | new_tokens: Token strings corresponding to the new data. |
| 108 | |
| 109 | Returns: |
| 110 | The diff, in unified diff format. |
| 111 | """ |
| 112 | return difflib.unified_diff(old_tokens, new_tokens, n=0, lineterm='') |
| 113 | |
| 114 | |
| 115 | def parse_chunk_header_file_range(file_range): |
| 116 | """Parses a chunk header file range. |
| 117 | |
| 118 | Diff chunk headers have the form: |
| 119 | @@ -<file-range> +<file-range> @@ |
| 120 | File ranges have the form: |
| 121 | <start line number>,<number of lines changed> |
| 122 | |
| 123 | Args: |
| 124 | file_range: A chunk header file range. |
| 125 | |
| 126 | Returns: |
| 127 | A tuple (range_start, range_end). The endpoints are adjusted such that |
| 128 | iterating over [range_start, range_end) will give the changed indices. |
| 129 | """ |
| 130 | if ',' in file_range: |
| 131 | file_range_parts = file_range.split(',') |
| 132 | start = int(file_range_parts[0]) |
| 133 | amount = int(file_range_parts[1]) |
| 134 | if amount == 0: |
| 135 | return (start, start) |
| 136 | return (start - 1, start + amount - 1) |
| 137 | else: |
| 138 | return (int(file_range) - 1, int(file_range)) |
| 139 | |
| 140 | |
| 141 | def compute_changed_token_indices(previous_tokens, current_tokens): |
| 142 | """Computes changed and added tokens. |
| 143 | |
| 144 | Args: |
| 145 | previous_tokens: Tokens corresponding to the old file. |
| 146 | current_tokens: Tokens corresponding to the new file. |
| 147 | |
| 148 | Returns: |
| 149 | A tuple (added_tokens, changed_tokens). |
| 150 | added_tokens: A list of indices into |current_tokens|. |
| 151 | changed_tokens: A map of indices into |current_tokens| to |
| 152 | indices into |previous_tokens|. |
| 153 | """ |
| 154 | prev_file_chunk_end = 0 |
| 155 | prev_patched_chunk_end = 0 |
| 156 | added_tokens = [] |
| 157 | changed_tokens = {} |
| 158 | for line in compute_unified_diff(previous_tokens, current_tokens): |
| 159 | if line.startswith("@@"): |
| 160 | parts = line.split(' ') |
| 161 | removed = parts[1].lstrip('-') |
| 162 | removed_start, removed_end = parse_chunk_header_file_range(removed) |
| 163 | added = parts[2].lstrip('+') |
| 164 | added_start, added_end = parse_chunk_header_file_range(added) |
| 165 | for i in range(added_start, added_end): |
| 166 | added_tokens.append(i) |
| 167 | for i in range(0, removed_start - prev_patched_chunk_end): |
| 168 | changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i |
| 169 | prev_patched_chunk_end = removed_end |
| 170 | prev_file_chunk_end = added_end |
| 171 | for i in range(0, len(previous_tokens) - prev_patched_chunk_end): |
| 172 | changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i |
| 173 | return added_tokens, changed_tokens |
| 174 | |
| 175 | |
| 176 | def flatten_nested_list(l): |
| 177 | """Flattens a list and provides a mapping from elements in the list back |
| 178 | into the nested list. |
| 179 | |
| 180 | Args: |
| 181 | l: A list of lists. |
| 182 | |
| 183 | Returns: |
| 184 | A tuple (flattened, index_to_position): |
| 185 | flattened: The flattened list. |
| 186 | index_to_position: A list of pairs (r, c) such that |
| 187 | index_to_position[i] == (r, c); flattened[i] == l[r][c] |
| 188 | """ |
| 189 | flattened = [] |
| 190 | index_to_position = {} |
| 191 | r = 0 |
| 192 | c = 0 |
| 193 | for nested_list in l: |
| 194 | for element in nested_list: |
| 195 | index_to_position[len(flattened)] = (r, c) |
| 196 | flattened.append(element) |
| 197 | c += 1 |
| 198 | r += 1 |
| 199 | c = 0 |
| 200 | return (flattened, index_to_position) |
| 201 | |
| 202 | |
| 203 | def compute_changed_token_positions(previous_tokens, current_tokens): |
| 204 | """Computes changed and added token positions. |
| 205 | |
| 206 | Args: |
| 207 | previous_tokens: A list of lists of token strings. Lines in the file |
| 208 | correspond to the nested lists. |
| 209 | current_tokens: A list of lists of token strings. Lines in the file |
| 210 | correspond to the nested lists. |
| 211 | |
| 212 | Returns: |
| 213 | A tuple (added_token_positions, changed_token_positions): |
| 214 | added_token_positions: A list of pairs that index into |current_tokens|. |
| 215 | changed_token_positions: A map from pairs that index into |
| 216 | |current_tokens| to pairs that index into |previous_tokens|. |
| 217 | """ |
| 218 | flat_previous_tokens, previous_index_to_position = flatten_nested_list( |
| 219 | previous_tokens) |
| 220 | flat_current_tokens, current_index_to_position = flatten_nested_list( |
| 221 | current_tokens) |
| 222 | added_indices, changed_indices = compute_changed_token_indices( |
| 223 | flat_previous_tokens, flat_current_tokens) |
| 224 | added_token_positions = [current_index_to_position[i] for i in added_indices] |
| 225 | changed_token_positions = { |
| 226 | current_index_to_position[current_i]: |
| 227 | previous_index_to_position[changed_indices[current_i]] |
| 228 | for current_i in changed_indices |
| 229 | } |
| 230 | return (added_token_positions, changed_token_positions) |
| 231 | |
| 232 | |
| 233 | def parse_chunks_from_diff(diff): |
| 234 | """Returns a generator of chunk data from a diff. |
| 235 | |
| 236 | Args: |
| 237 | diff: A list of strings, with each string being a line from a diff |
| 238 | in unified diff format. |
| 239 | |
| 240 | Returns: |
| 241 | A generator of tuples (added_lines_start, added_lines_end, |
| 242 | removed_lines, removed_lines_start) |
| 243 | """ |
| 244 | in_chunk = False |
| 245 | chunk_previous = [] |
| 246 | previous_start = None |
| 247 | current_start = None |
| 248 | current_end = None |
| 249 | for line in diff: |
| 250 | if line.startswith('@@'): |
| 251 | if in_chunk: |
| 252 | yield (current_start, current_end, |
| 253 | chunk_previous, previous_start) |
| 254 | parts = line.split(' ') |
| 255 | previous = parts[1].lstrip('-') |
| 256 | previous_start, _ = parse_chunk_header_file_range(previous) |
| 257 | current = parts[2].lstrip('+') |
| 258 | current_start, current_end = parse_chunk_header_file_range(current) |
| 259 | in_chunk = True |
| 260 | chunk_previous = [] |
| 261 | elif in_chunk and line.startswith('-'): |
| 262 | chunk_previous.append(line[1:]) |
| 263 | if current_start != None: |
| 264 | yield (current_start, current_end, |
| 265 | chunk_previous, previous_start) |
| 266 | |
| 267 | |
| 268 | def should_skip_commit(commit): |
| 269 | """Decides if |commit| should be skipped when computing the blame. |
| 270 | |
| 271 | Commit 5d4451e deleted all files in the repo except for DEPS. The |
| 272 | next commit, 1e7896, brought them back. This is a hack to skip |
| 273 | those commits (except for the files they modified). If we did not |
| 274 | do this, changes would be incorrectly attributed to 1e7896. |
| 275 | |
| 276 | Args: |
| 277 | commit: A Commit object. |
| 278 | |
| 279 | Returns: |
| 280 | A boolean indicating if this commit should be skipped. |
| 281 | """ |
| 282 | banned_commits = [ |
| 283 | '1e78967ed2f1937b3809c19d91e7dd62d756d307', |
| 284 | '5d4451ebf298d9d71f716cc0135f465cec41fcd0', |
| 285 | ] |
| 286 | if commit.hash not in banned_commits: |
| 287 | return False |
| 288 | banned_commits_file_exceptions = [ |
| 289 | 'DEPS', |
| 290 | 'chrome/browser/ui/views/file_manager_dialog_browsertest.cc', |
| 291 | ] |
| 292 | for line in commit.diff: |
| 293 | if line.startswith('---') or line.startswith('+++'): |
| 294 | if line.split(' ')[1] in banned_commits_file_exceptions: |
| 295 | return False |
| 296 | elif line.startswith('@@'): |
| 297 | return True |
| 298 | assert False |
| 299 | |
| 300 | |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 301 | def generate_substrings(file): |
| 302 | """Generates substrings from a file stream, where substrings are |
| 303 | separated by '\0'. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 304 | |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 305 | For example, the input: |
| 306 | 'a\0bc\0\0\0d\0' |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 307 | would produce the output: |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 308 | ['a', 'bc', 'd'] |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 309 | |
| 310 | Args: |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 311 | file: A readable file. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 312 | """ |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 313 | BUF_SIZE = 448 # Experimentally found to be pretty fast. |
| 314 | data = [] |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 315 | while True: |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 316 | buf = file.read(BUF_SIZE) |
| 317 | parts = buf.split('\0') |
| 318 | data.append(parts[0]) |
| 319 | if len(parts) > 1: |
| 320 | joined = ''.join(data) |
| 321 | if joined != '': |
| 322 | yield joined |
| 323 | for i in range(1, len(parts) - 1): |
| 324 | if parts[i] != '': |
| 325 | yield parts[i] |
| 326 | data = [parts[-1]] |
| 327 | if len(buf) < BUF_SIZE: |
| 328 | joined = ''.join(data) |
| 329 | if joined != '': |
| 330 | yield joined |
| 331 | return |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 332 | |
| 333 | |
| 334 | def generate_commits(git_log_stdout): |
| 335 | """Parses git log output into a stream of Commit objects. |
| 336 | """ |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 337 | substring_generator = generate_substrings(git_log_stdout) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 338 | while True: |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 339 | hash = substring_generator.next() |
| 340 | author_name = substring_generator.next() |
| 341 | author_email = substring_generator.next() |
| 342 | author_date = substring_generator.next() |
| 343 | message = substring_generator.next() |
| 344 | diff = substring_generator.next().split('\n') |
| 345 | yield Commit(hash, author_name, author_email, author_date, message, diff) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 346 | |
| 347 | |
| 348 | def uberblame_aux(file_name, git_log_stdout, data): |
| 349 | """Computes the uberblame of file |file_name|. |
| 350 | |
| 351 | Args: |
| 352 | file_name: File to uberblame. |
| 353 | git_log_stdout: A file object that represents the git log output. |
| 354 | data: A string containing the data of file |file_name|. |
| 355 | |
| 356 | Returns: |
| 357 | A tuple (data, blame). |
| 358 | data: File contents. |
| 359 | blame: A list of TokenContexts. |
| 360 | """ |
| 361 | blame = tokenize_data(data) |
| 362 | |
| 363 | blamed_tokens = 0 |
| 364 | total_tokens = len(blame) |
| 365 | uber_blame = (data, blame[:]) |
| 366 | |
| 367 | for commit in generate_commits(git_log_stdout): |
| 368 | if should_skip_commit(commit): |
| 369 | continue |
| 370 | |
| 371 | offset = 0 |
| 372 | for (added_lines_start, added_lines_end, removed_lines, |
| 373 | removed_lines_start) in parse_chunks_from_diff(commit.diff): |
| 374 | added_lines_start += offset |
| 375 | added_lines_end += offset |
| 376 | previous_contexts = [token_lines |
| 377 | for line_previous in removed_lines |
| 378 | for token_lines in tokenize_data(line_previous)] |
| 379 | previous_tokens = [ |
| 380 | [context.token for context in contexts] |
| 381 | for contexts in previous_contexts |
| 382 | ] |
| 383 | current_contexts = blame[added_lines_start:added_lines_end] |
| 384 | current_tokens = [ |
| 385 | [context.token for context in contexts] |
| 386 | for contexts in current_contexts |
| 387 | ] |
| 388 | added_token_positions, changed_token_positions = ( |
| 389 | compute_changed_token_positions(previous_tokens, current_tokens)) |
| 390 | for r, c in added_token_positions: |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 391 | current_contexts[r][c].commit = commit |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 392 | blamed_tokens += 1 |
| 393 | for r, c in changed_token_positions: |
| 394 | pr, pc = changed_token_positions[(r, c)] |
| 395 | previous_contexts[pr][pc] = current_contexts[r][c] |
| 396 | |
| 397 | assert added_lines_start <= added_lines_end <= len(blame) |
| 398 | current_blame_size = len(blame) |
| 399 | blame[added_lines_start:added_lines_end] = previous_contexts |
| 400 | offset += len(blame) - current_blame_size |
| 401 | |
| 402 | assert blame == [] or blame == [[]] |
| 403 | return uber_blame |
| 404 | |
| 405 | |
| 406 | def uberblame(file_name, revision): |
| 407 | """Computes the uberblame of file |file_name|. |
| 408 | |
| 409 | Args: |
| 410 | file_name: File to uberblame. |
| 411 | revision: The revision to start the uberblame at. |
| 412 | |
| 413 | Returns: |
| 414 | A tuple (data, blame). |
| 415 | data: File contents. |
| 416 | blame: A list of TokenContexts. |
| 417 | """ |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 418 | cmd_git_log = [ |
| 419 | 'git', |
| 420 | 'log', |
| 421 | '--minimal', |
| 422 | '--no-prefix', |
| 423 | '--follow', |
| 424 | '-m', |
| 425 | '--first-parent', |
| 426 | '-p', |
| 427 | '-U0', |
| 428 | '-z', |
| 429 | '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B', |
| 430 | revision, |
| 431 | '--', |
| 432 | file_name |
| 433 | ] |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 434 | git_log = subprocess.Popen(cmd_git_log, |
| 435 | stdout=subprocess.PIPE, |
| 436 | stderr=subprocess.PIPE) |
| 437 | data = subprocess.check_output( |
| 438 | ['git', 'show', '%s:%s' % (revision, file_name)]) |
| 439 | data, blame = uberblame_aux(file_name, git_log.stdout, data) |
| 440 | |
| 441 | _, stderr = git_log.communicate() |
| 442 | if git_log.returncode != 0: |
| 443 | raise subprocess.CalledProcessError(git_log.returncode, cmd_git_log, stderr) |
| 444 | return data, blame |
| 445 | |
| 446 | |
| 447 | def generate_pastel_color(): |
| 448 | (h, l, s) = (random.uniform(0, 1), |
| 449 | random.uniform(0.8, 0.9), |
| 450 | random.uniform(0.5, 1)) |
| 451 | (r, g, b) = colorsys.hls_to_rgb(h, l, s) |
| 452 | return "#%0.2X%0.2X%0.2X" % (int(r*255), int(g*255), int(b*255)) |
| 453 | |
| 454 | |
| 455 | def visualize_uberblame(data, blame): |
| 456 | """Creates and displays a web page to visualize |blame|. |
| 457 | |
| 458 | Args: |
| 459 | data: The data file as returned by uberblame(). |
| 460 | blame: A list of TokenContexts as returned by uberblame(). |
| 461 | """ |
| 462 | # Use the same seed for the color generator on each run so that |
| 463 | # loading the same blame of the same file twice will result in the |
| 464 | # same generated HTML page. |
| 465 | random.seed(0x52937865ec62d1ea) |
| 466 | html = """\ |
| 467 | <html> |
| 468 | <head> |
| 469 | <style> |
| 470 | body { |
| 471 | font-family: "Courier New"; |
| 472 | } |
| 473 | pre { |
| 474 | display: inline; |
| 475 | } |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 476 | span { |
Tom Anderson | a671dad | 2017-10-10 19:19:47 | [diff] [blame] | 477 | outline: 1pt solid #00000030; |
| 478 | outline-offset: -1pt; |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 479 | cursor: pointer; |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 480 | } |
| 481 | #linenums { |
| 482 | text-align: right; |
| 483 | } |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 484 | #file_display { |
| 485 | position: absolute; |
| 486 | left: 0; |
| 487 | top: 0; |
| 488 | width: 50%%; |
| 489 | height: 100%%; |
| 490 | overflow: scroll; |
| 491 | } |
| 492 | #commit_display_container { |
| 493 | position: absolute; |
| 494 | left: 50%%; |
| 495 | top: 0; |
| 496 | width: 50%%; |
| 497 | height: 100%%; |
| 498 | overflow: scroll; |
| 499 | } |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 500 | </style> |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 501 | <script> |
| 502 | commit_data = %s; |
| 503 | function display_commit(hash) { |
| 504 | var e = document.getElementById("commit_display"); |
| 505 | e.innerHTML = commit_data[hash] |
| 506 | } |
| 507 | </script> |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 508 | </head> |
| 509 | <body> |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 510 | <div id="file_display"> |
| 511 | <table> |
| 512 | <tbody> |
| 513 | <tr> |
| 514 | <td valign="top" id="linenums"> |
| 515 | <pre>%s</pre> |
| 516 | </td> |
| 517 | <td valign="top"> |
| 518 | <pre>%s</pre> |
| 519 | </td> |
| 520 | </tr> |
| 521 | </tbody> |
| 522 | </table> |
| 523 | </div> |
| 524 | <div id="commit_display_container" valign="top"> |
| 525 | <pre id="commit_display" /> |
| 526 | </div> |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 527 | </body> |
| 528 | </html> |
| 529 | """ |
| 530 | html = textwrap.dedent(html) |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 531 | commits = {} |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 532 | lines = [] |
| 533 | commit_colors = {} |
| 534 | blame_index = 0 |
| 535 | blame = [context for contexts in blame for context in contexts] |
| 536 | row = 0 |
| 537 | lastline = '' |
| 538 | for line in data.split('\n'): |
| 539 | lastline = line |
| 540 | column = 0 |
| 541 | for c in line + '\n': |
| 542 | if blame_index < len(blame): |
| 543 | token_context = blame[blame_index] |
| 544 | if (row == token_context.row and |
| 545 | column == token_context.column + len(token_context.token)): |
| 546 | if (blame_index + 1 == len(blame) or |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 547 | blame[blame_index].commit.hash != |
| 548 | blame[blame_index + 1].commit.hash): |
| 549 | lines.append('</span>') |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 550 | blame_index += 1 |
| 551 | if blame_index < len(blame): |
| 552 | token_context = blame[blame_index] |
| 553 | if row == token_context.row and column == token_context.column: |
| 554 | if (blame_index == 0 or |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 555 | blame[blame_index - 1].commit.hash != |
| 556 | blame[blame_index].commit.hash): |
| 557 | hash = token_context.commit.hash |
| 558 | commits[hash] = token_context.commit |
| 559 | if hash not in commit_colors: |
| 560 | commit_colors[hash] = generate_pastel_color() |
| 561 | color = commit_colors[hash] |
| 562 | lines.append( |
| 563 | ('<span style="background-color: %s" ' + |
| 564 | 'onclick="display_commit("%s")">') % (color, hash)) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 565 | lines.append(cgi.escape(c)) |
| 566 | column += 1 |
| 567 | row += 1 |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 568 | commit_data = ['{'] |
| 569 | commit_display_format = """\ |
| 570 | commit: {hash} |
| 571 | Author: {author_name} <{author_email}> |
| 572 | Date: {author_date} |
| 573 | |
| 574 | {message} |
| 575 | """ |
| 576 | commit_display_format = textwrap.dedent(commit_display_format) |
| 577 | links = re.compile(r'(https?:\/\/\S+)') |
| 578 | for hash in commits: |
| 579 | commit = commits[hash] |
| 580 | commit_display = commit_display_format.format( |
| 581 | hash=hash, |
| 582 | author_name=commit.author_name, |
| 583 | author_email=commit.author_email, |
| 584 | author_date=commit.author_date, |
| 585 | message=commit.message, |
| 586 | ) |
| 587 | commit_display = cgi.escape(commit_display, quote=True) |
| 588 | commit_display = re.sub( |
| 589 | links, '<a href=\\"\\1\\">\\1</a>', commit_display) |
| 590 | commit_display = commit_display.replace('\n', '\\n') |
| 591 | commit_data.append('"%s": "%s",' % (hash, commit_display)) |
| 592 | commit_data.append('}') |
| 593 | commit_data = ''.join(commit_data) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 594 | line_nums = range(1, row if lastline.strip() == '' else row + 1) |
| 595 | line_nums = '\n'.join([str(num) for num in line_nums]) |
| 596 | lines = ''.join(lines) |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame^] | 597 | return html % (commit_data, line_nums, lines) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 598 | |
| 599 | |
| 600 | def show_visualization(html): |
| 601 | """Display |html| in a web browser. |
| 602 | |
| 603 | Args: |
| 604 | html: The contents of the file to display, as a string. |
| 605 | """ |
| 606 | # Keep the temporary file around so the browser has time to open it. |
| 607 | # TODO(thomasanderson): spin up a temporary web server to serve this |
| 608 | # file so we don't have to leak it. |
| 609 | html_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html') |
| 610 | html_file.write(html) |
| 611 | html_file.flush() |
| 612 | if sys.platform.startswith('linux'): |
| 613 | # Don't show any messages when starting the browser. |
| 614 | saved_stdout = os.dup(1) |
| 615 | saved_stderr = os.dup(2) |
| 616 | os.close(1) |
| 617 | os.close(2) |
| 618 | os.open(os.devnull, os.O_RDWR) |
| 619 | os.open(os.devnull, os.O_RDWR) |
| 620 | webbrowser.open('file://' + html_file.name) |
| 621 | if sys.platform.startswith('linux'): |
| 622 | os.dup2(saved_stdout, 1) |
| 623 | os.dup2(saved_stderr, 2) |
| 624 | os.close(saved_stdout) |
| 625 | os.close(saved_stderr) |
| 626 | |
| 627 | |
| 628 | def main(): |
| 629 | parser = argparse.ArgumentParser( |
| 630 | description='Show what revision last modified each token of a file') |
| 631 | parser.add_argument('revision', default='HEAD', nargs='?', |
| 632 | help='Show only commits starting from a revision.') |
| 633 | parser.add_argument('file', help='The file to uberblame.') |
| 634 | args = parser.parse_args() |
| 635 | |
| 636 | data, blame = uberblame(args.file, args.revision) |
| 637 | html = visualize_uberblame(data, blame) |
| 638 | show_visualization(html) |
| 639 | return 0 |
| 640 | |
| 641 | |
| 642 | if __name__ == '__main__': |
| 643 | sys.exit(main()) |