Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright 2017 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | import argparse |
| 7 | import cgi |
| 8 | import colorsys |
| 9 | import difflib |
| 10 | import random |
| 11 | import os |
| 12 | import re |
| 13 | import subprocess |
| 14 | import sys |
| 15 | import tempfile |
| 16 | import textwrap |
| 17 | import webbrowser |
| 18 | |
| 19 | |
| 20 | class TokenContext(object): |
| 21 | """Metadata about a token. |
| 22 | |
| 23 | Attributes: |
| 24 | row: Row index of the token in the data file. |
| 25 | column: Column index of the token in the data file. |
| 26 | token: The token string. |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 27 | commit: A Commit object that corresponds to the commit that added |
| 28 | this token. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 29 | """ |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 30 | |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 31 | def __init__(self, row, column, token, commit=None): |
| 32 | self.row = row |
| 33 | self.column = column |
| 34 | self.token = token |
| 35 | self.commit = commit |
| 36 | |
| 37 | |
| 38 | class Commit(object): |
| 39 | """Commit data. |
| 40 | |
| 41 | Attributes: |
| 42 | hash: The commit hash. |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 43 | author_name: The author's name. |
| 44 | author_email: the author's email. |
| 45 | author_date: The date and time the author created this commit. |
| 46 | message: The commit message. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 47 | diff: The commit diff. |
| 48 | """ |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 49 | |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 50 | def __init__(self, hash, author_name, author_email, author_date, message, |
| 51 | diff): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 52 | self.hash = hash |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 53 | self.author_name = author_name |
| 54 | self.author_email = author_email |
| 55 | self.author_date = author_date |
| 56 | self.message = message |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 57 | self.diff = diff |
| 58 | |
| 59 | |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 60 | def tokenize_data(data, tokenize_by_char, tokenize_whitespace): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 61 | """Tokenizes |data|. |
| 62 | |
| 63 | Args: |
| 64 | data: String to tokenize. |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 65 | tokenize_by_char: If true, individual characters are treated as tokens. |
| 66 | Otherwise, tokens are either symbols or strings of both alphanumeric |
| 67 | characters and underscores. |
| 68 | tokenize_whitespace: Treat non-newline whitespace characters as tokens. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 69 | |
| 70 | Returns: |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 71 | A list of lists of TokenContexts. Each list represents a line. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 72 | """ |
| 73 | contexts = [] |
| 74 | in_identifier = False |
| 75 | identifier_start = 0 |
| 76 | identifier = '' |
| 77 | row = 0 |
| 78 | column = 0 |
| 79 | line_contexts = [] |
| 80 | |
| 81 | for c in data + '\n': |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 82 | if not tokenize_by_char and (c.isalnum() or c == '_'): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 83 | if in_identifier: |
| 84 | identifier += c |
| 85 | else: |
| 86 | in_identifier = True |
| 87 | identifier_start = column |
| 88 | identifier = c |
| 89 | else: |
| 90 | if in_identifier: |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 91 | line_contexts.append(TokenContext(row, identifier_start, identifier)) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 92 | in_identifier = False |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 93 | if not c.isspace() or (tokenize_whitespace and c != '\n'): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 94 | line_contexts.append(TokenContext(row, column, c)) |
| 95 | |
| 96 | if c == '\n': |
| 97 | row += 1 |
| 98 | column = 0 |
| 99 | contexts.append(line_contexts) |
| 100 | line_tokens = [] |
| 101 | line_contexts = [] |
| 102 | else: |
| 103 | column += 1 |
| 104 | return contexts |
| 105 | |
| 106 | |
| 107 | def compute_unified_diff(old_tokens, new_tokens): |
| 108 | """Computes the diff between |old_tokens| and |new_tokens|. |
| 109 | |
| 110 | Args: |
| 111 | old_tokens: Token strings corresponding to the old data. |
| 112 | new_tokens: Token strings corresponding to the new data. |
| 113 | |
| 114 | Returns: |
| 115 | The diff, in unified diff format. |
| 116 | """ |
| 117 | return difflib.unified_diff(old_tokens, new_tokens, n=0, lineterm='') |
| 118 | |
| 119 | |
| 120 | def parse_chunk_header_file_range(file_range): |
| 121 | """Parses a chunk header file range. |
| 122 | |
| 123 | Diff chunk headers have the form: |
| 124 | @@ -<file-range> +<file-range> @@ |
| 125 | File ranges have the form: |
| 126 | <start line number>,<number of lines changed> |
| 127 | |
| 128 | Args: |
| 129 | file_range: A chunk header file range. |
| 130 | |
| 131 | Returns: |
| 132 | A tuple (range_start, range_end). The endpoints are adjusted such that |
| 133 | iterating over [range_start, range_end) will give the changed indices. |
| 134 | """ |
| 135 | if ',' in file_range: |
| 136 | file_range_parts = file_range.split(',') |
| 137 | start = int(file_range_parts[0]) |
| 138 | amount = int(file_range_parts[1]) |
| 139 | if amount == 0: |
| 140 | return (start, start) |
| 141 | return (start - 1, start + amount - 1) |
| 142 | else: |
| 143 | return (int(file_range) - 1, int(file_range)) |
| 144 | |
| 145 | |
| 146 | def compute_changed_token_indices(previous_tokens, current_tokens): |
| 147 | """Computes changed and added tokens. |
| 148 | |
| 149 | Args: |
| 150 | previous_tokens: Tokens corresponding to the old file. |
| 151 | current_tokens: Tokens corresponding to the new file. |
| 152 | |
| 153 | Returns: |
| 154 | A tuple (added_tokens, changed_tokens). |
| 155 | added_tokens: A list of indices into |current_tokens|. |
| 156 | changed_tokens: A map of indices into |current_tokens| to |
| 157 | indices into |previous_tokens|. |
| 158 | """ |
| 159 | prev_file_chunk_end = 0 |
| 160 | prev_patched_chunk_end = 0 |
| 161 | added_tokens = [] |
| 162 | changed_tokens = {} |
| 163 | for line in compute_unified_diff(previous_tokens, current_tokens): |
| 164 | if line.startswith("@@"): |
| 165 | parts = line.split(' ') |
| 166 | removed = parts[1].lstrip('-') |
| 167 | removed_start, removed_end = parse_chunk_header_file_range(removed) |
| 168 | added = parts[2].lstrip('+') |
| 169 | added_start, added_end = parse_chunk_header_file_range(added) |
| 170 | for i in range(added_start, added_end): |
| 171 | added_tokens.append(i) |
| 172 | for i in range(0, removed_start - prev_patched_chunk_end): |
| 173 | changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i |
| 174 | prev_patched_chunk_end = removed_end |
| 175 | prev_file_chunk_end = added_end |
| 176 | for i in range(0, len(previous_tokens) - prev_patched_chunk_end): |
| 177 | changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i |
| 178 | return added_tokens, changed_tokens |
| 179 | |
| 180 | |
| 181 | def flatten_nested_list(l): |
| 182 | """Flattens a list and provides a mapping from elements in the list back |
| 183 | into the nested list. |
| 184 | |
| 185 | Args: |
| 186 | l: A list of lists. |
| 187 | |
| 188 | Returns: |
| 189 | A tuple (flattened, index_to_position): |
| 190 | flattened: The flattened list. |
| 191 | index_to_position: A list of pairs (r, c) such that |
| 192 | index_to_position[i] == (r, c); flattened[i] == l[r][c] |
| 193 | """ |
| 194 | flattened = [] |
| 195 | index_to_position = {} |
| 196 | r = 0 |
| 197 | c = 0 |
| 198 | for nested_list in l: |
| 199 | for element in nested_list: |
| 200 | index_to_position[len(flattened)] = (r, c) |
| 201 | flattened.append(element) |
| 202 | c += 1 |
| 203 | r += 1 |
| 204 | c = 0 |
| 205 | return (flattened, index_to_position) |
| 206 | |
| 207 | |
| 208 | def compute_changed_token_positions(previous_tokens, current_tokens): |
| 209 | """Computes changed and added token positions. |
| 210 | |
| 211 | Args: |
| 212 | previous_tokens: A list of lists of token strings. Lines in the file |
| 213 | correspond to the nested lists. |
| 214 | current_tokens: A list of lists of token strings. Lines in the file |
| 215 | correspond to the nested lists. |
| 216 | |
| 217 | Returns: |
| 218 | A tuple (added_token_positions, changed_token_positions): |
| 219 | added_token_positions: A list of pairs that index into |current_tokens|. |
| 220 | changed_token_positions: A map from pairs that index into |
| 221 | |current_tokens| to pairs that index into |previous_tokens|. |
| 222 | """ |
| 223 | flat_previous_tokens, previous_index_to_position = flatten_nested_list( |
| 224 | previous_tokens) |
| 225 | flat_current_tokens, current_index_to_position = flatten_nested_list( |
| 226 | current_tokens) |
| 227 | added_indices, changed_indices = compute_changed_token_indices( |
| 228 | flat_previous_tokens, flat_current_tokens) |
| 229 | added_token_positions = [current_index_to_position[i] for i in added_indices] |
| 230 | changed_token_positions = { |
| 231 | current_index_to_position[current_i]: |
| 232 | previous_index_to_position[changed_indices[current_i]] |
| 233 | for current_i in changed_indices |
| 234 | } |
| 235 | return (added_token_positions, changed_token_positions) |
| 236 | |
| 237 | |
| 238 | def parse_chunks_from_diff(diff): |
| 239 | """Returns a generator of chunk data from a diff. |
| 240 | |
| 241 | Args: |
| 242 | diff: A list of strings, with each string being a line from a diff |
| 243 | in unified diff format. |
| 244 | |
| 245 | Returns: |
Tom Anderson | 500216114 | 2018-04-17 00:30:21 | [diff] [blame] | 246 | A generator of tuples (added_lines_start, added_lines_end, removed_lines) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 247 | """ |
Tom Anderson | 500216114 | 2018-04-17 00:30:21 | [diff] [blame] | 248 | it = iter(diff) |
| 249 | for line in it: |
| 250 | while not line.startswith('@@'): |
| 251 | line = it.next() |
| 252 | parts = line.split(' ') |
| 253 | previous_start, previous_end = parse_chunk_header_file_range( |
| 254 | parts[1].lstrip('-')) |
| 255 | current_start, current_end = parse_chunk_header_file_range( |
| 256 | parts[2].lstrip('+')) |
| 257 | |
| 258 | in_delta = False |
| 259 | added_lines_start = None |
| 260 | added_lines_end = None |
| 261 | removed_lines = [] |
| 262 | while previous_start < previous_end or current_start < current_end: |
| 263 | line = it.next() |
| 264 | firstchar = line[0] |
| 265 | line = line[1:] |
| 266 | if not in_delta and (firstchar == '-' or firstchar == '+'): |
| 267 | in_delta = True |
| 268 | added_lines_start = current_start |
| 269 | added_lines_end = current_start |
| 270 | removed_lines = [] |
| 271 | |
| 272 | if firstchar == '-': |
| 273 | removed_lines.append(line) |
| 274 | previous_start += 1 |
| 275 | elif firstchar == '+': |
| 276 | current_start += 1 |
| 277 | added_lines_end = current_start |
| 278 | elif firstchar == ' ': |
| 279 | if in_delta: |
| 280 | in_delta = False |
| 281 | yield (added_lines_start, added_lines_end, removed_lines) |
| 282 | previous_start += 1 |
| 283 | current_start += 1 |
| 284 | if in_delta: |
| 285 | yield (added_lines_start, added_lines_end, removed_lines) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 286 | |
| 287 | |
| 288 | def should_skip_commit(commit): |
| 289 | """Decides if |commit| should be skipped when computing the blame. |
| 290 | |
| 291 | Commit 5d4451e deleted all files in the repo except for DEPS. The |
| 292 | next commit, 1e7896, brought them back. This is a hack to skip |
| 293 | those commits (except for the files they modified). If we did not |
| 294 | do this, changes would be incorrectly attributed to 1e7896. |
| 295 | |
| 296 | Args: |
| 297 | commit: A Commit object. |
| 298 | |
| 299 | Returns: |
| 300 | A boolean indicating if this commit should be skipped. |
| 301 | """ |
| 302 | banned_commits = [ |
| 303 | '1e78967ed2f1937b3809c19d91e7dd62d756d307', |
| 304 | '5d4451ebf298d9d71f716cc0135f465cec41fcd0', |
| 305 | ] |
| 306 | if commit.hash not in banned_commits: |
| 307 | return False |
| 308 | banned_commits_file_exceptions = [ |
| 309 | 'DEPS', |
| 310 | 'chrome/browser/ui/views/file_manager_dialog_browsertest.cc', |
| 311 | ] |
| 312 | for line in commit.diff: |
| 313 | if line.startswith('---') or line.startswith('+++'): |
| 314 | if line.split(' ')[1] in banned_commits_file_exceptions: |
| 315 | return False |
| 316 | elif line.startswith('@@'): |
| 317 | return True |
| 318 | assert False |
| 319 | |
| 320 | |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 321 | def generate_substrings(file): |
| 322 | """Generates substrings from a file stream, where substrings are |
| 323 | separated by '\0'. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 324 | |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 325 | For example, the input: |
| 326 | 'a\0bc\0\0\0d\0' |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 327 | would produce the output: |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 328 | ['a', 'bc', 'd'] |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 329 | |
| 330 | Args: |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 331 | file: A readable file. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 332 | """ |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 333 | BUF_SIZE = 448 # Experimentally found to be pretty fast. |
| 334 | data = [] |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 335 | while True: |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 336 | buf = file.read(BUF_SIZE) |
| 337 | parts = buf.split('\0') |
| 338 | data.append(parts[0]) |
| 339 | if len(parts) > 1: |
| 340 | joined = ''.join(data) |
| 341 | if joined != '': |
| 342 | yield joined |
| 343 | for i in range(1, len(parts) - 1): |
| 344 | if parts[i] != '': |
| 345 | yield parts[i] |
| 346 | data = [parts[-1]] |
| 347 | if len(buf) < BUF_SIZE: |
| 348 | joined = ''.join(data) |
| 349 | if joined != '': |
| 350 | yield joined |
| 351 | return |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 352 | |
| 353 | |
| 354 | def generate_commits(git_log_stdout): |
| 355 | """Parses git log output into a stream of Commit objects. |
| 356 | """ |
Tom Anderson | 1e71692 | 2017-10-12 19:43:49 | [diff] [blame] | 357 | substring_generator = generate_substrings(git_log_stdout) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 358 | while True: |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 359 | hash = substring_generator.next() |
| 360 | author_name = substring_generator.next() |
| 361 | author_email = substring_generator.next() |
| 362 | author_date = substring_generator.next() |
| 363 | message = substring_generator.next() |
| 364 | diff = substring_generator.next().split('\n') |
| 365 | yield Commit(hash, author_name, author_email, author_date, message, diff) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 366 | |
| 367 | |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 368 | def uberblame_aux(file_name, git_log_stdout, data, tokenization_method): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 369 | """Computes the uberblame of file |file_name|. |
| 370 | |
| 371 | Args: |
| 372 | file_name: File to uberblame. |
| 373 | git_log_stdout: A file object that represents the git log output. |
| 374 | data: A string containing the data of file |file_name|. |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 375 | tokenization_method: A function that takes a string and returns a list of |
| 376 | TokenContexts. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 377 | |
| 378 | Returns: |
| 379 | A tuple (data, blame). |
| 380 | data: File contents. |
| 381 | blame: A list of TokenContexts. |
| 382 | """ |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 383 | blame = tokenization_method(data) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 384 | |
| 385 | blamed_tokens = 0 |
| 386 | total_tokens = len(blame) |
| 387 | uber_blame = (data, blame[:]) |
| 388 | |
| 389 | for commit in generate_commits(git_log_stdout): |
| 390 | if should_skip_commit(commit): |
| 391 | continue |
| 392 | |
| 393 | offset = 0 |
Tom Anderson | 500216114 | 2018-04-17 00:30:21 | [diff] [blame] | 394 | for (added_lines_start, added_lines_end, |
| 395 | removed_lines) in parse_chunks_from_diff(commit.diff): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 396 | added_lines_start += offset |
| 397 | added_lines_end += offset |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 398 | previous_contexts = [ |
| 399 | token_lines |
| 400 | for line_previous in removed_lines |
| 401 | for token_lines in tokenization_method(line_previous) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 402 | ] |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 403 | previous_tokens = [[context.token for context in contexts] |
| 404 | for contexts in previous_contexts] |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 405 | current_contexts = blame[added_lines_start:added_lines_end] |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 406 | current_tokens = [[context.token for context in contexts] |
| 407 | for contexts in current_contexts] |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 408 | added_token_positions, changed_token_positions = ( |
| 409 | compute_changed_token_positions(previous_tokens, current_tokens)) |
| 410 | for r, c in added_token_positions: |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 411 | current_contexts[r][c].commit = commit |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 412 | blamed_tokens += 1 |
| 413 | for r, c in changed_token_positions: |
| 414 | pr, pc = changed_token_positions[(r, c)] |
| 415 | previous_contexts[pr][pc] = current_contexts[r][c] |
| 416 | |
| 417 | assert added_lines_start <= added_lines_end <= len(blame) |
| 418 | current_blame_size = len(blame) |
| 419 | blame[added_lines_start:added_lines_end] = previous_contexts |
| 420 | offset += len(blame) - current_blame_size |
| 421 | |
| 422 | assert blame == [] or blame == [[]] |
| 423 | return uber_blame |
| 424 | |
| 425 | |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 426 | def uberblame(file_name, revision, tokenization_method): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 427 | """Computes the uberblame of file |file_name|. |
| 428 | |
| 429 | Args: |
| 430 | file_name: File to uberblame. |
| 431 | revision: The revision to start the uberblame at. |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 432 | tokenization_method: A function that takes a string and returns a list of |
| 433 | TokenContexts. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 434 | |
| 435 | Returns: |
| 436 | A tuple (data, blame). |
| 437 | data: File contents. |
| 438 | blame: A list of TokenContexts. |
| 439 | """ |
Tom Anderson | 500216114 | 2018-04-17 00:30:21 | [diff] [blame] | 440 | DIFF_CONTEXT = 3 |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 441 | cmd_git_log = [ |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 442 | 'git', 'log', '--minimal', '--no-prefix', '--follow', '-m', |
Tom Anderson | 500216114 | 2018-04-17 00:30:21 | [diff] [blame] | 443 | '--first-parent', '-p', |
| 444 | '-U%d' % DIFF_CONTEXT, '-z', '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B', |
| 445 | revision, '--', file_name |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 446 | ] |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 447 | git_log = subprocess.Popen( |
| 448 | cmd_git_log, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 449 | data = subprocess.check_output( |
| 450 | ['git', 'show', '%s:%s' % (revision, file_name)]) |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 451 | data, blame = uberblame_aux(file_name, git_log.stdout, data, |
| 452 | tokenization_method) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 453 | |
| 454 | _, stderr = git_log.communicate() |
| 455 | if git_log.returncode != 0: |
| 456 | raise subprocess.CalledProcessError(git_log.returncode, cmd_git_log, stderr) |
| 457 | return data, blame |
| 458 | |
| 459 | |
| 460 | def generate_pastel_color(): |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 461 | """Generates a random color from a nice looking pastel palette. |
| 462 | |
| 463 | Returns: |
| 464 | The color, formatted as hex string. For example, white is "#FFFFFF". |
| 465 | """ |
| 466 | (h, l, s) = (random.uniform(0, 1), random.uniform(0.8, 0.9), random.uniform( |
| 467 | 0.5, 1)) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 468 | (r, g, b) = colorsys.hls_to_rgb(h, l, s) |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 469 | return "#%0.2X%0.2X%0.2X" % (int(r * 255), int(g * 255), int(b * 255)) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 470 | |
| 471 | |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 472 | def create_visualization(data, blame): |
| 473 | """Creates a web page to visualize |blame|. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 474 | |
| 475 | Args: |
| 476 | data: The data file as returned by uberblame(). |
| 477 | blame: A list of TokenContexts as returned by uberblame(). |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 478 | |
| 479 | Returns; |
| 480 | The html for the generated page, as a string. |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 481 | """ |
| 482 | # Use the same seed for the color generator on each run so that |
| 483 | # loading the same blame of the same file twice will result in the |
| 484 | # same generated HTML page. |
| 485 | random.seed(0x52937865ec62d1ea) |
| 486 | html = """\ |
| 487 | <html> |
| 488 | <head> |
| 489 | <style> |
| 490 | body { |
| 491 | font-family: "Courier New"; |
| 492 | } |
| 493 | pre { |
| 494 | display: inline; |
| 495 | } |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 496 | span { |
Tom Anderson | a671dad | 2017-10-10 19:19:47 | [diff] [blame] | 497 | outline: 1pt solid #00000030; |
| 498 | outline-offset: -1pt; |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 499 | cursor: pointer; |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 500 | } |
| 501 | #linenums { |
| 502 | text-align: right; |
| 503 | } |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 504 | #file_display { |
| 505 | position: absolute; |
| 506 | left: 0; |
| 507 | top: 0; |
| 508 | width: 50%%; |
| 509 | height: 100%%; |
| 510 | overflow: scroll; |
| 511 | } |
| 512 | #commit_display_container { |
| 513 | position: absolute; |
| 514 | left: 50%%; |
| 515 | top: 0; |
| 516 | width: 50%%; |
| 517 | height: 100%%; |
| 518 | overflow: scroll; |
| 519 | } |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 520 | </style> |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 521 | <script> |
| 522 | commit_data = %s; |
| 523 | function display_commit(hash) { |
| 524 | var e = document.getElementById("commit_display"); |
| 525 | e.innerHTML = commit_data[hash] |
| 526 | } |
| 527 | </script> |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 528 | </head> |
| 529 | <body> |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 530 | <div id="file_display"> |
| 531 | <table> |
| 532 | <tbody> |
| 533 | <tr> |
| 534 | <td valign="top" id="linenums"> |
| 535 | <pre>%s</pre> |
| 536 | </td> |
| 537 | <td valign="top"> |
| 538 | <pre>%s</pre> |
| 539 | </td> |
| 540 | </tr> |
| 541 | </tbody> |
| 542 | </table> |
| 543 | </div> |
| 544 | <div id="commit_display_container" valign="top"> |
| 545 | <pre id="commit_display" /> |
| 546 | </div> |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 547 | </body> |
| 548 | </html> |
| 549 | """ |
| 550 | html = textwrap.dedent(html) |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 551 | commits = {} |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 552 | lines = [] |
| 553 | commit_colors = {} |
| 554 | blame_index = 0 |
| 555 | blame = [context for contexts in blame for context in contexts] |
| 556 | row = 0 |
| 557 | lastline = '' |
| 558 | for line in data.split('\n'): |
| 559 | lastline = line |
| 560 | column = 0 |
| 561 | for c in line + '\n': |
| 562 | if blame_index < len(blame): |
| 563 | token_context = blame[blame_index] |
| 564 | if (row == token_context.row and |
| 565 | column == token_context.column + len(token_context.token)): |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 566 | if (blame_index + 1 == len(blame) or blame[blame_index].commit.hash != |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 567 | blame[blame_index + 1].commit.hash): |
| 568 | lines.append('</span>') |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 569 | blame_index += 1 |
| 570 | if blame_index < len(blame): |
| 571 | token_context = blame[blame_index] |
| 572 | if row == token_context.row and column == token_context.column: |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 573 | if (blame_index == 0 or blame[blame_index - 1].commit.hash != |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 574 | blame[blame_index].commit.hash): |
| 575 | hash = token_context.commit.hash |
| 576 | commits[hash] = token_context.commit |
| 577 | if hash not in commit_colors: |
| 578 | commit_colors[hash] = generate_pastel_color() |
| 579 | color = commit_colors[hash] |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 580 | lines.append(('<span style="background-color: %s" ' + |
| 581 | 'onclick="display_commit("%s")">') % (color, |
| 582 | hash)) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 583 | lines.append(cgi.escape(c)) |
| 584 | column += 1 |
| 585 | row += 1 |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 586 | commit_data = ['{'] |
| 587 | commit_display_format = """\ |
| 588 | commit: {hash} |
| 589 | Author: {author_name} <{author_email}> |
| 590 | Date: {author_date} |
| 591 | |
| 592 | {message} |
| 593 | """ |
| 594 | commit_display_format = textwrap.dedent(commit_display_format) |
| 595 | links = re.compile(r'(https?:\/\/\S+)') |
| 596 | for hash in commits: |
| 597 | commit = commits[hash] |
| 598 | commit_display = commit_display_format.format( |
| 599 | hash=hash, |
| 600 | author_name=commit.author_name, |
| 601 | author_email=commit.author_email, |
| 602 | author_date=commit.author_date, |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 603 | message=commit.message) |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 604 | commit_display = cgi.escape(commit_display, quote=True) |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 605 | commit_display = re.sub(links, '<a href=\\"\\1\\">\\1</a>', commit_display) |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 606 | commit_display = commit_display.replace('\n', '\\n') |
| 607 | commit_data.append('"%s": "%s",' % (hash, commit_display)) |
| 608 | commit_data.append('}') |
| 609 | commit_data = ''.join(commit_data) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 610 | line_nums = range(1, row if lastline.strip() == '' else row + 1) |
| 611 | line_nums = '\n'.join([str(num) for num in line_nums]) |
| 612 | lines = ''.join(lines) |
Tom Anderson | 6541015 | 2017-10-17 01:53:19 | [diff] [blame] | 613 | return html % (commit_data, line_nums, lines) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 614 | |
| 615 | |
| 616 | def show_visualization(html): |
| 617 | """Display |html| in a web browser. |
| 618 | |
| 619 | Args: |
| 620 | html: The contents of the file to display, as a string. |
| 621 | """ |
| 622 | # Keep the temporary file around so the browser has time to open it. |
| 623 | # TODO(thomasanderson): spin up a temporary web server to serve this |
| 624 | # file so we don't have to leak it. |
| 625 | html_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html') |
| 626 | html_file.write(html) |
| 627 | html_file.flush() |
| 628 | if sys.platform.startswith('linux'): |
| 629 | # Don't show any messages when starting the browser. |
| 630 | saved_stdout = os.dup(1) |
| 631 | saved_stderr = os.dup(2) |
| 632 | os.close(1) |
| 633 | os.close(2) |
| 634 | os.open(os.devnull, os.O_RDWR) |
| 635 | os.open(os.devnull, os.O_RDWR) |
| 636 | webbrowser.open('file://' + html_file.name) |
| 637 | if sys.platform.startswith('linux'): |
| 638 | os.dup2(saved_stdout, 1) |
| 639 | os.dup2(saved_stderr, 2) |
| 640 | os.close(saved_stdout) |
| 641 | os.close(saved_stderr) |
| 642 | |
| 643 | |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 644 | def main(argv): |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 645 | parser = argparse.ArgumentParser( |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 646 | description='Show what revision last modified each token of a file.') |
| 647 | parser.add_argument( |
| 648 | 'revision', |
| 649 | default='HEAD', |
| 650 | nargs='?', |
| 651 | help='show only commits starting from a revision') |
| 652 | parser.add_argument('file', help='the file to uberblame') |
| 653 | parser.add_argument( |
| 654 | '--skip-visualization', |
| 655 | action='store_true', |
| 656 | help='do not display the blame visualization in a web browser') |
| 657 | parser.add_argument( |
| 658 | '--tokenize-by-char', |
| 659 | action='store_true', |
| 660 | help='treat individual characters as tokens') |
| 661 | parser.add_argument( |
| 662 | '--tokenize-whitespace', |
| 663 | action='store_true', |
| 664 | help='also blame non-newline whitespace characters') |
| 665 | args = parser.parse_args(argv) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 666 | |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 667 | def tokenization_method(data): |
| 668 | return tokenize_data(data, args.tokenize_by_char, args.tokenize_whitespace) |
| 669 | |
| 670 | data, blame = uberblame(args.file, args.revision, tokenization_method) |
| 671 | html = create_visualization(data, blame) |
| 672 | if not args.skip_visualization: |
| 673 | show_visualization(html) |
Tom Anderson | c3ed896 | 2017-10-09 19:01:46 | [diff] [blame] | 674 | return 0 |
| 675 | |
| 676 | |
| 677 | if __name__ == '__main__': |
Tom Anderson | b3d7e64 | 2018-04-13 16:23:42 | [diff] [blame] | 678 | sys.exit(main(sys.argv[1:])) |