Add more options to uberblame
* Adds --tokenize-by-character
* This is useful for eg. the blink refactor where fooBar got changed to
foo_bar.
* Adds --tokenize-whitespace
* Adds --skip-visualization
* Added this because it was useful for testing.
* Ran yapf
* Small refactor.
BUG=773350
R=dpranke
Change-Id: I7596f670ce640ed6bd367dee995628edc13dd6f2
Reviewed-on: https://chromium-review.googlesource.com/1010798
Reviewed-by: Dirk Pranke <[email protected]>
Commit-Queue: Thomas Anderson <[email protected]>
Cr-Commit-Position: refs/heads/master@{#550634}
diff --git a/tools/uberblame.py b/tools/uberblame.py
index 52ff93a..9c99a0d2 100755
--- a/tools/uberblame.py
+++ b/tools/uberblame.py
@@ -27,6 +27,7 @@
commit: A Commit object that corresponds to the commit that added
this token.
"""
+
def __init__(self, row, column, token, commit=None):
self.row = row
self.column = column
@@ -45,6 +46,7 @@
message: The commit message.
diff: The commit diff.
"""
+
def __init__(self, hash, author_name, author_email, author_date, message,
diff):
self.hash = hash
@@ -55,14 +57,18 @@
self.diff = diff
-def tokenize_data(data):
+def tokenize_data(data, tokenize_by_char, tokenize_whitespace):
"""Tokenizes |data|.
Args:
data: String to tokenize.
+ tokenize_by_char: If true, individual characters are treated as tokens.
+ Otherwise, tokens are either symbols or strings of both alphanumeric
+ characters and underscores.
+ tokenize_whitespace: Treat non-newline whitespace characters as tokens.
Returns:
- A list of TokenContexts.
+ A list of lists of TokenContexts. Each list represents a line.
"""
contexts = []
in_identifier = False
@@ -73,7 +79,7 @@
line_contexts = []
for c in data + '\n':
- if c.isalnum() or c == '_':
+ if not tokenize_by_char and (c.isalnum() or c == '_'):
if in_identifier:
identifier += c
else:
@@ -82,10 +88,9 @@
identifier = c
else:
if in_identifier:
- line_contexts.append(
- TokenContext(row, identifier_start, identifier))
+ line_contexts.append(TokenContext(row, identifier_start, identifier))
in_identifier = False
- if not c.isspace():
+ if not c.isspace() or (tokenize_whitespace and c != '\n'):
line_contexts.append(TokenContext(row, column, c))
if c == '\n':
@@ -249,8 +254,7 @@
for line in diff:
if line.startswith('@@'):
if in_chunk:
- yield (current_start, current_end,
- chunk_previous, previous_start)
+ yield (current_start, current_end, chunk_previous, previous_start)
parts = line.split(' ')
previous = parts[1].lstrip('-')
previous_start, _ = parse_chunk_header_file_range(previous)
@@ -261,8 +265,7 @@
elif in_chunk and line.startswith('-'):
chunk_previous.append(line[1:])
if current_start != None:
- yield (current_start, current_end,
- chunk_previous, previous_start)
+ yield (current_start, current_end, chunk_previous, previous_start)
def should_skip_commit(commit):
@@ -345,20 +348,22 @@
yield Commit(hash, author_name, author_email, author_date, message, diff)
-def uberblame_aux(file_name, git_log_stdout, data):
+def uberblame_aux(file_name, git_log_stdout, data, tokenization_method):
"""Computes the uberblame of file |file_name|.
Args:
file_name: File to uberblame.
git_log_stdout: A file object that represents the git log output.
data: A string containing the data of file |file_name|.
+ tokenization_method: A function that takes a string and returns a list of
+ TokenContexts.
Returns:
A tuple (data, blame).
data: File contents.
blame: A list of TokenContexts.
"""
- blame = tokenize_data(data)
+ blame = tokenization_method(data)
blamed_tokens = 0
total_tokens = len(blame)
@@ -373,18 +378,16 @@
removed_lines_start) in parse_chunks_from_diff(commit.diff):
added_lines_start += offset
added_lines_end += offset
- previous_contexts = [token_lines
- for line_previous in removed_lines
- for token_lines in tokenize_data(line_previous)]
- previous_tokens = [
- [context.token for context in contexts]
- for contexts in previous_contexts
+ previous_contexts = [
+ token_lines
+ for line_previous in removed_lines
+ for token_lines in tokenization_method(line_previous)
]
+ previous_tokens = [[context.token for context in contexts]
+ for contexts in previous_contexts]
current_contexts = blame[added_lines_start:added_lines_end]
- current_tokens = [
- [context.token for context in contexts]
- for contexts in current_contexts
- ]
+ current_tokens = [[context.token for context in contexts]
+ for contexts in current_contexts]
added_token_positions, changed_token_positions = (
compute_changed_token_positions(previous_tokens, current_tokens))
for r, c in added_token_positions:
@@ -403,12 +406,14 @@
return uber_blame
-def uberblame(file_name, revision):
+def uberblame(file_name, revision, tokenization_method):
"""Computes the uberblame of file |file_name|.
Args:
file_name: File to uberblame.
revision: The revision to start the uberblame at.
+ tokenization_method: A function that takes a string and returns a list of
+ TokenContexts.
Returns:
A tuple (data, blame).
@@ -416,27 +421,16 @@
blame: A list of TokenContexts.
"""
cmd_git_log = [
- 'git',
- 'log',
- '--minimal',
- '--no-prefix',
- '--follow',
- '-m',
- '--first-parent',
- '-p',
- '-U0',
- '-z',
- '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B',
- revision,
- '--',
- file_name
+ 'git', 'log', '--minimal', '--no-prefix', '--follow', '-m',
+ '--first-parent', '-p', '-U0', '-z',
+ '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B', revision, '--', file_name
]
- git_log = subprocess.Popen(cmd_git_log,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ git_log = subprocess.Popen(
+ cmd_git_log, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
data = subprocess.check_output(
['git', 'show', '%s:%s' % (revision, file_name)])
- data, blame = uberblame_aux(file_name, git_log.stdout, data)
+ data, blame = uberblame_aux(file_name, git_log.stdout, data,
+ tokenization_method)
_, stderr = git_log.communicate()
if git_log.returncode != 0:
@@ -445,19 +439,26 @@
def generate_pastel_color():
- (h, l, s) = (random.uniform(0, 1),
- random.uniform(0.8, 0.9),
- random.uniform(0.5, 1))
+ """Generates a random color from a nice looking pastel palette.
+
+ Returns:
+ The color, formatted as hex string. For example, white is "#FFFFFF".
+ """
+ (h, l, s) = (random.uniform(0, 1), random.uniform(0.8, 0.9), random.uniform(
+ 0.5, 1))
(r, g, b) = colorsys.hls_to_rgb(h, l, s)
- return "#%0.2X%0.2X%0.2X" % (int(r*255), int(g*255), int(b*255))
+ return "#%0.2X%0.2X%0.2X" % (int(r * 255), int(g * 255), int(b * 255))
-def visualize_uberblame(data, blame):
- """Creates and displays a web page to visualize |blame|.
+def create_visualization(data, blame):
+ """Creates a web page to visualize |blame|.
Args:
data: The data file as returned by uberblame().
blame: A list of TokenContexts as returned by uberblame().
+
+ Returns;
+ The html for the generated page, as a string.
"""
# Use the same seed for the color generator on each run so that
# loading the same blame of the same file twice will result in the
@@ -543,25 +544,23 @@
token_context = blame[blame_index]
if (row == token_context.row and
column == token_context.column + len(token_context.token)):
- if (blame_index + 1 == len(blame) or
- blame[blame_index].commit.hash !=
+ if (blame_index + 1 == len(blame) or blame[blame_index].commit.hash !=
blame[blame_index + 1].commit.hash):
lines.append('</span>')
blame_index += 1
if blame_index < len(blame):
token_context = blame[blame_index]
if row == token_context.row and column == token_context.column:
- if (blame_index == 0 or
- blame[blame_index - 1].commit.hash !=
+ if (blame_index == 0 or blame[blame_index - 1].commit.hash !=
blame[blame_index].commit.hash):
hash = token_context.commit.hash
commits[hash] = token_context.commit
if hash not in commit_colors:
commit_colors[hash] = generate_pastel_color()
color = commit_colors[hash]
- lines.append(
- ('<span style="background-color: %s" ' +
- 'onclick="display_commit("%s")">') % (color, hash))
+ lines.append(('<span style="background-color: %s" ' +
+ 'onclick="display_commit("%s")">') % (color,
+ hash))
lines.append(cgi.escape(c))
column += 1
row += 1
@@ -582,11 +581,9 @@
author_name=commit.author_name,
author_email=commit.author_email,
author_date=commit.author_date,
- message=commit.message,
- )
+ message=commit.message)
commit_display = cgi.escape(commit_display, quote=True)
- commit_display = re.sub(
- links, '<a href=\\"\\1\\">\\1</a>', commit_display)
+ commit_display = re.sub(links, '<a href=\\"\\1\\">\\1</a>', commit_display)
commit_display = commit_display.replace('\n', '\\n')
commit_data.append('"%s": "%s",' % (hash, commit_display))
commit_data.append('}')
@@ -625,19 +622,38 @@
os.close(saved_stderr)
-def main():
+def main(argv):
parser = argparse.ArgumentParser(
- description='Show what revision last modified each token of a file')
- parser.add_argument('revision', default='HEAD', nargs='?',
- help='Show only commits starting from a revision.')
- parser.add_argument('file', help='The file to uberblame.')
- args = parser.parse_args()
+ description='Show what revision last modified each token of a file.')
+ parser.add_argument(
+ 'revision',
+ default='HEAD',
+ nargs='?',
+ help='show only commits starting from a revision')
+ parser.add_argument('file', help='the file to uberblame')
+ parser.add_argument(
+ '--skip-visualization',
+ action='store_true',
+ help='do not display the blame visualization in a web browser')
+ parser.add_argument(
+ '--tokenize-by-char',
+ action='store_true',
+ help='treat individual characters as tokens')
+ parser.add_argument(
+ '--tokenize-whitespace',
+ action='store_true',
+ help='also blame non-newline whitespace characters')
+ args = parser.parse_args(argv)
- data, blame = uberblame(args.file, args.revision)
- html = visualize_uberblame(data, blame)
- show_visualization(html)
+ def tokenization_method(data):
+ return tokenize_data(data, args.tokenize_by_char, args.tokenize_whitespace)
+
+ data, blame = uberblame(args.file, args.revision, tokenization_method)
+ html = create_visualization(data, blame)
+ if not args.skip_visualization:
+ show_visualization(html)
return 0
if __name__ == '__main__':
- sys.exit(main())
+ sys.exit(main(sys.argv[1:]))