Blame - tools/uberblame.py - chromium/src

blob: 89b6d88b333b1b0010a8e04fa205d66b15d9ecee [file] [log] [blame]

Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright 2017 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	import argparse
				7	import cgi
				8	import colorsys
				9	import difflib
				10	import random
				11	import os
				12	import re
				13	import subprocess
				14	import sys
				15	import tempfile
				16	import textwrap
				17	import webbrowser
				18
				19
				20	class TokenContext(object):
				21	"""Metadata about a token.
				22
				23	Attributes:
				24	row: Row index of the token in the data file.
				25	column: Column index of the token in the data file.
				26	token: The token string.
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	27	commit: A Commit object that corresponds to the commit that added
				28	this token.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	29	"""
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	30
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	31	def __init__(self, row, column, token, commit=None):
				32	self.row = row
				33	self.column = column
				34	self.token = token
				35	self.commit = commit
				36
				37
				38	class Commit(object):
				39	"""Commit data.
				40
				41	Attributes:
				42	hash: The commit hash.
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	43	author_name: The author's name.
				44	author_email: the author's email.
				45	author_date: The date and time the author created this commit.
				46	message: The commit message.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	47	diff: The commit diff.
				48	"""
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	49
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	50	def __init__(self, hash, author_name, author_email, author_date, message,
				51	diff):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	52	self.hash = hash
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	53	self.author_name = author_name
				54	self.author_email = author_email
				55	self.author_date = author_date
				56	self.message = message
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	57	self.diff = diff
				58
				59
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	60	def tokenize_data(data, tokenize_by_char, tokenize_whitespace):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	61	"""Tokenizes \|data\|.
				62
				63	Args:
				64	data: String to tokenize.
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	65	tokenize_by_char: If true, individual characters are treated as tokens.
				66	Otherwise, tokens are either symbols or strings of both alphanumeric
				67	characters and underscores.
				68	tokenize_whitespace: Treat non-newline whitespace characters as tokens.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	69
				70	Returns:
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	71	A list of lists of TokenContexts. Each list represents a line.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	72	"""
				73	contexts = []
				74	in_identifier = False
				75	identifier_start = 0
				76	identifier = ''
				77	row = 0
				78	column = 0
				79	line_contexts = []
				80
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	81	for c in data:
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	82	if not tokenize_by_char and (c.isalnum() or c == '_'):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	83	if in_identifier:
				84	identifier += c
				85	else:
				86	in_identifier = True
				87	identifier_start = column
				88	identifier = c
				89	else:
				90	if in_identifier:
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	91	line_contexts.append(TokenContext(row, identifier_start, identifier))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	92	in_identifier = False
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	93	if not c.isspace() or (tokenize_whitespace and c != '\n'):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	94	line_contexts.append(TokenContext(row, column, c))
				95
				96	if c == '\n':
				97	row += 1
				98	column = 0
				99	contexts.append(line_contexts)
				100	line_tokens = []
				101	line_contexts = []
				102	else:
				103	column += 1
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	104	contexts.append(line_contexts)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	105	return contexts
				106
				107
				108	def compute_unified_diff(old_tokens, new_tokens):
				109	"""Computes the diff between \|old_tokens\| and \|new_tokens\|.
				110
				111	Args:
				112	old_tokens: Token strings corresponding to the old data.
				113	new_tokens: Token strings corresponding to the new data.
				114
				115	Returns:
				116	The diff, in unified diff format.
				117	"""
				118	return difflib.unified_diff(old_tokens, new_tokens, n=0, lineterm='')
				119
				120
				121	def parse_chunk_header_file_range(file_range):
				122	"""Parses a chunk header file range.
				123
				124	Diff chunk headers have the form:
				125	@@ -<file-range> +<file-range> @@
				126	File ranges have the form:
				127	<start line number>,<number of lines changed>
				128
				129	Args:
				130	file_range: A chunk header file range.
				131
				132	Returns:
				133	A tuple (range_start, range_end). The endpoints are adjusted such that
				134	iterating over [range_start, range_end) will give the changed indices.
				135	"""
				136	if ',' in file_range:
				137	file_range_parts = file_range.split(',')
				138	start = int(file_range_parts[0])
				139	amount = int(file_range_parts[1])
				140	if amount == 0:
				141	return (start, start)
				142	return (start - 1, start + amount - 1)
				143	else:
				144	return (int(file_range) - 1, int(file_range))
				145
				146
				147	def compute_changed_token_indices(previous_tokens, current_tokens):
				148	"""Computes changed and added tokens.
				149
				150	Args:
				151	previous_tokens: Tokens corresponding to the old file.
				152	current_tokens: Tokens corresponding to the new file.
				153
				154	Returns:
				155	A tuple (added_tokens, changed_tokens).
				156	added_tokens: A list of indices into \|current_tokens\|.
				157	changed_tokens: A map of indices into \|current_tokens\| to
				158	indices into \|previous_tokens\|.
				159	"""
				160	prev_file_chunk_end = 0
				161	prev_patched_chunk_end = 0
				162	added_tokens = []
				163	changed_tokens = {}
				164	for line in compute_unified_diff(previous_tokens, current_tokens):
				165	if line.startswith("@@"):
				166	parts = line.split(' ')
				167	removed = parts[1].lstrip('-')
				168	removed_start, removed_end = parse_chunk_header_file_range(removed)
				169	added = parts[2].lstrip('+')
				170	added_start, added_end = parse_chunk_header_file_range(added)
				171	for i in range(added_start, added_end):
				172	added_tokens.append(i)
				173	for i in range(0, removed_start - prev_patched_chunk_end):
				174	changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
				175	prev_patched_chunk_end = removed_end
				176	prev_file_chunk_end = added_end
				177	for i in range(0, len(previous_tokens) - prev_patched_chunk_end):
				178	changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
				179	return added_tokens, changed_tokens
				180
				181
				182	def flatten_nested_list(l):
				183	"""Flattens a list and provides a mapping from elements in the list back
				184	into the nested list.
				185
				186	Args:
				187	l: A list of lists.
				188
				189	Returns:
				190	A tuple (flattened, index_to_position):
				191	flattened: The flattened list.
				192	index_to_position: A list of pairs (r, c) such that
				193	index_to_position[i] == (r, c); flattened[i] == l[r][c]
				194	"""
				195	flattened = []
				196	index_to_position = {}
				197	r = 0
				198	c = 0
				199	for nested_list in l:
				200	for element in nested_list:
				201	index_to_position[len(flattened)] = (r, c)
				202	flattened.append(element)
				203	c += 1
				204	r += 1
				205	c = 0
				206	return (flattened, index_to_position)
				207
				208
				209	def compute_changed_token_positions(previous_tokens, current_tokens):
				210	"""Computes changed and added token positions.
				211
				212	Args:
				213	previous_tokens: A list of lists of token strings. Lines in the file
				214	correspond to the nested lists.
				215	current_tokens: A list of lists of token strings. Lines in the file
				216	correspond to the nested lists.
				217
				218	Returns:
				219	A tuple (added_token_positions, changed_token_positions):
				220	added_token_positions: A list of pairs that index into \|current_tokens\|.
				221	changed_token_positions: A map from pairs that index into
				222	\|current_tokens\| to pairs that index into \|previous_tokens\|.
				223	"""
				224	flat_previous_tokens, previous_index_to_position = flatten_nested_list(
				225	previous_tokens)
				226	flat_current_tokens, current_index_to_position = flatten_nested_list(
				227	current_tokens)
				228	added_indices, changed_indices = compute_changed_token_indices(
				229	flat_previous_tokens, flat_current_tokens)
				230	added_token_positions = [current_index_to_position[i] for i in added_indices]
				231	changed_token_positions = {
				232	current_index_to_position[current_i]:
				233	previous_index_to_position[changed_indices[current_i]]
				234	for current_i in changed_indices
				235	}
				236	return (added_token_positions, changed_token_positions)
				237
				238
				239	def parse_chunks_from_diff(diff):
				240	"""Returns a generator of chunk data from a diff.
				241
				242	Args:
				243	diff: A list of strings, with each string being a line from a diff
				244	in unified diff format.
				245
				246	Returns:
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	247	A generator of tuples (added_lines_start, added_lines_end, removed_lines)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	248	"""
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	249	it = iter(diff)
				250	for line in it:
				251	while not line.startswith('@@'):
				252	line = it.next()
				253	parts = line.split(' ')
				254	previous_start, previous_end = parse_chunk_header_file_range(
				255	parts[1].lstrip('-'))
				256	current_start, current_end = parse_chunk_header_file_range(
				257	parts[2].lstrip('+'))
				258
				259	in_delta = False
				260	added_lines_start = None
				261	added_lines_end = None
				262	removed_lines = []
				263	while previous_start < previous_end or current_start < current_end:
				264	line = it.next()
				265	firstchar = line[0]
				266	line = line[1:]
				267	if not in_delta and (firstchar == '-' or firstchar == '+'):
				268	in_delta = True
				269	added_lines_start = current_start
				270	added_lines_end = current_start
				271	removed_lines = []
				272
				273	if firstchar == '-':
				274	removed_lines.append(line)
				275	previous_start += 1
				276	elif firstchar == '+':
				277	current_start += 1
				278	added_lines_end = current_start
				279	elif firstchar == ' ':
				280	if in_delta:
				281	in_delta = False
				282	yield (added_lines_start, added_lines_end, removed_lines)
				283	previous_start += 1
				284	current_start += 1
				285	if in_delta:
				286	yield (added_lines_start, added_lines_end, removed_lines)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	287
				288
				289	def should_skip_commit(commit):
				290	"""Decides if \|commit\| should be skipped when computing the blame.
				291
				292	Commit 5d4451e deleted all files in the repo except for DEPS. The
				293	next commit, 1e7896, brought them back. This is a hack to skip
				294	those commits (except for the files they modified). If we did not
				295	do this, changes would be incorrectly attributed to 1e7896.
				296
				297	Args:
				298	commit: A Commit object.
				299
				300	Returns:
				301	A boolean indicating if this commit should be skipped.
				302	"""
				303	banned_commits = [
				304	'1e78967ed2f1937b3809c19d91e7dd62d756d307',
				305	'5d4451ebf298d9d71f716cc0135f465cec41fcd0',
				306	]
				307	if commit.hash not in banned_commits:
				308	return False
				309	banned_commits_file_exceptions = [
				310	'DEPS',
				311	'chrome/browser/ui/views/file_manager_dialog_browsertest.cc',
				312	]
				313	for line in commit.diff:
				314	if line.startswith('---') or line.startswith('+++'):
				315	if line.split(' ')[1] in banned_commits_file_exceptions:
				316	return False
				317	elif line.startswith('@@'):
				318	return True
				319	assert False
				320
				321
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	322	def generate_substrings(file):
				323	"""Generates substrings from a file stream, where substrings are
				324	separated by '\0'.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	325
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	326	For example, the input:
				327	'a\0bc\0\0\0d\0'
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	328	would produce the output:
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	329	['a', 'bc', 'd']
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	330
				331	Args:
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	332	file: A readable file.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	333	"""
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	334	BUF_SIZE = 448 # Experimentally found to be pretty fast.
				335	data = []
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	336	while True:
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	337	buf = file.read(BUF_SIZE)
				338	parts = buf.split('\0')
				339	data.append(parts[0])
				340	if len(parts) > 1:
				341	joined = ''.join(data)
				342	if joined != '':
				343	yield joined
				344	for i in range(1, len(parts) - 1):
				345	if parts[i] != '':
				346	yield parts[i]
				347	data = [parts[-1]]
				348	if len(buf) < BUF_SIZE:
				349	joined = ''.join(data)
				350	if joined != '':
				351	yield joined
				352	return
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	353
				354
				355	def generate_commits(git_log_stdout):
				356	"""Parses git log output into a stream of Commit objects.
				357	"""
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	358	substring_generator = generate_substrings(git_log_stdout)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	359	while True:
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	360	hash = substring_generator.next()
				361	author_name = substring_generator.next()
				362	author_email = substring_generator.next()
				363	author_date = substring_generator.next()
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	364	message = substring_generator.next().rstrip('\n')
				365	diff = substring_generator.next().split('\n')[1:-1]
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	366	yield Commit(hash, author_name, author_email, author_date, message, diff)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	367
				368
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	369	def uberblame_aux(file_name, git_log_stdout, data, tokenization_method):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	370	"""Computes the uberblame of file \|file_name\|.
				371
				372	Args:
				373	file_name: File to uberblame.
				374	git_log_stdout: A file object that represents the git log output.
				375	data: A string containing the data of file \|file_name\|.
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	376	tokenization_method: A function that takes a string and returns a list of
				377	TokenContexts.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	378
				379	Returns:
				380	A tuple (data, blame).
				381	data: File contents.
				382	blame: A list of TokenContexts.
				383	"""
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	384	blame = tokenization_method(data)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	385
				386	blamed_tokens = 0
				387	total_tokens = len(blame)
				388	uber_blame = (data, blame[:])
				389
				390	for commit in generate_commits(git_log_stdout):
				391	if should_skip_commit(commit):
				392	continue
				393
				394	offset = 0
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	395	for (added_lines_start, added_lines_end,
				396	removed_lines) in parse_chunks_from_diff(commit.diff):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	397	added_lines_start += offset
				398	added_lines_end += offset
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	399	previous_contexts = [
				400	token_lines
				401	for line_previous in removed_lines
				402	for token_lines in tokenization_method(line_previous)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	403	]
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	404	previous_tokens = [[context.token for context in contexts]
				405	for contexts in previous_contexts]
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	406	current_contexts = blame[added_lines_start:added_lines_end]
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	407	current_tokens = [[context.token for context in contexts]
				408	for contexts in current_contexts]
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	409	added_token_positions, changed_token_positions = (
				410	compute_changed_token_positions(previous_tokens, current_tokens))
				411	for r, c in added_token_positions:
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	412	current_contexts[r][c].commit = commit
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	413	blamed_tokens += 1
				414	for r, c in changed_token_positions:
				415	pr, pc = changed_token_positions[(r, c)]
				416	previous_contexts[pr][pc] = current_contexts[r][c]
				417
				418	assert added_lines_start <= added_lines_end <= len(blame)
				419	current_blame_size = len(blame)
				420	blame[added_lines_start:added_lines_end] = previous_contexts
				421	offset += len(blame) - current_blame_size
				422
				423	assert blame == [] or blame == [[]]
				424	return uber_blame
				425
				426
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	427	def uberblame(file_name, revision, tokenization_method):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	428	"""Computes the uberblame of file \|file_name\|.
				429
				430	Args:
				431	file_name: File to uberblame.
				432	revision: The revision to start the uberblame at.
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	433	tokenization_method: A function that takes a string and returns a list of
				434	TokenContexts.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	435
				436	Returns:
				437	A tuple (data, blame).
				438	data: File contents.
				439	blame: A list of TokenContexts.
				440	"""
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	441	DIFF_CONTEXT = 3
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	442	cmd_git_log = [
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	443	'git', 'log', '--minimal', '--no-prefix', '--follow', '-m',
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	444	'--first-parent', '-p',
				445	'-U%d' % DIFF_CONTEXT, '-z', '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B',
				446	revision, '--', file_name
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	447	]
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	448	git_log = subprocess.Popen(
				449	cmd_git_log, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	450	data = subprocess.check_output(
				451	['git', 'show', '%s:%s' % (revision, file_name)])
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	452	data, blame = uberblame_aux(file_name, git_log.stdout, data,
				453	tokenization_method)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	454
				455	_, stderr = git_log.communicate()
				456	if git_log.returncode != 0:
				457	raise subprocess.CalledProcessError(git_log.returncode, cmd_git_log, stderr)
				458	return data, blame
				459
				460
				461	def generate_pastel_color():
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	462	"""Generates a random color from a nice looking pastel palette.
				463
				464	Returns:
				465	The color, formatted as hex string. For example, white is "#FFFFFF".
				466	"""
				467	(h, l, s) = (random.uniform(0, 1), random.uniform(0.8, 0.9), random.uniform(
				468	0.5, 1))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	469	(r, g, b) = colorsys.hls_to_rgb(h, l, s)
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	470	return "#%0.2X%0.2X%0.2X" % (int(r * 255), int(g * 255), int(b * 255))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	471
				472
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	473	def colorize_diff(diff):
				474	"""Colorizes a diff for use in an HTML page.
				475
				476	Args:
				477	diff: The diff, in unified diff format, as a list of line strings.
				478
				479	Returns:
				480	The HTML-formatted diff, as a string. The diff will already be escaped.
				481	"""
				482
				483	colorized = []
				484	for line in diff:
				485	escaped = cgi.escape(line.replace('\r', ''), quote=True)
				486	if line.startswith('+'):
				487	colorized.append('<span class=\\"addition\\">%s</span>' % escaped)
				488	elif line.startswith('-'):
				489	colorized.append('<span class=\\"deletion\\">%s</span>' % escaped)
				490	elif line.startswith('@@'):
				491	context_begin = escaped.find('@@', 2)
				492	assert context_begin != -1
				493	colorized.append(
				494	'<span class=\\"chunk_meta\\">%s</span>'
				495	'<span class=\\"chunk_context\\">%s</span'
				496	% (escaped[0:context_begin + 2], escaped[context_begin + 2:]))
				497	elif line.startswith('diff') or line.startswith('index'):
				498	colorized.append('<span class=\\"file_header\\">%s</span>' % escaped)
				499	else:
				500	colorized.append('<span class=\\"context_line\\">%s</span>' % escaped)
				501	return '\n'.join(colorized)
				502
				503
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	504	def create_visualization(data, blame):
				505	"""Creates a web page to visualize \|blame\|.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	506
				507	Args:
				508	data: The data file as returned by uberblame().
				509	blame: A list of TokenContexts as returned by uberblame().
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	510
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	511	Returns:
				512	The HTML for the generated page, as a string.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	513	"""
				514	# Use the same seed for the color generator on each run so that
				515	# loading the same blame of the same file twice will result in the
				516	# same generated HTML page.
				517	random.seed(0x52937865ec62d1ea)
				518	html = """\
				519	<html>
				520	<head>
				521	<style>
				522	body {
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	523	font-family: monospace;
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	524	}
				525	pre {
				526	display: inline;
				527	}
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	528	.token {
Tom Anderson	a671dad	2017-10-10 19:19:47	[diff] [blame]	529	outline: 1pt solid #00000030;
				530	outline-offset: -1pt;
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	531	cursor: pointer;
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	532	}
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	533	.addition {
				534	color: #080;
				535	}
				536	.deletion {
				537	color: #c00;
				538	}
				539	.chunk_meta {
				540	color: #099;
				541	}
				542	.context_line .chunk_context {
				543	// Just normal text.
				544	}
				545	.file_header {
				546	font-weight: bold;
				547	}
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	548	#linenums {
				549	text-align: right;
				550	}
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	551	#file_display {
				552	position: absolute;
				553	left: 0;
				554	top: 0;
				555	width: 50%%;
				556	height: 100%%;
				557	overflow: scroll;
				558	}
				559	#commit_display_container {
				560	position: absolute;
				561	left: 50%%;
				562	top: 0;
				563	width: 50%%;
				564	height: 100%%;
				565	overflow: scroll;
				566	}
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	567	</style>
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	568	<script>
				569	commit_data = %s;
				570	function display_commit(hash) {
				571	var e = document.getElementById("commit_display");
				572	e.innerHTML = commit_data[hash]
				573	}
				574	</script>
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	575	</head>
				576	<body>
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	577	<div id="file_display">
				578	<table>
				579	<tbody>
				580	<tr>
				581	<td valign="top" id="linenums">
				582	<pre>%s</pre>
				583	</td>
				584	<td valign="top">
				585	<pre>%s</pre>
				586	</td>
				587	</tr>
				588	</tbody>
				589	</table>
				590	</div>
				591	<div id="commit_display_container" valign="top">
				592	<pre id="commit_display" />
				593	</div>
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	594	</body>
				595	</html>
				596	"""
				597	html = textwrap.dedent(html)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	598	commits = {}
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	599	lines = []
				600	commit_colors = {}
				601	blame_index = 0
				602	blame = [context for contexts in blame for context in contexts]
				603	row = 0
				604	lastline = ''
				605	for line in data.split('\n'):
				606	lastline = line
				607	column = 0
				608	for c in line + '\n':
				609	if blame_index < len(blame):
				610	token_context = blame[blame_index]
				611	if (row == token_context.row and
				612	column == token_context.column + len(token_context.token)):
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	613	if (blame_index + 1 == len(blame) or blame[blame_index].commit.hash !=
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	614	blame[blame_index + 1].commit.hash):
				615	lines.append('</span>')
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	616	blame_index += 1
				617	if blame_index < len(blame):
				618	token_context = blame[blame_index]
				619	if row == token_context.row and column == token_context.column:
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	620	if (blame_index == 0 or blame[blame_index - 1].commit.hash !=
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	621	blame[blame_index].commit.hash):
				622	hash = token_context.commit.hash
				623	commits[hash] = token_context.commit
				624	if hash not in commit_colors:
				625	commit_colors[hash] = generate_pastel_color()
				626	color = commit_colors[hash]
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	627	lines.append(('<span class="token" style="background-color: %s" ' +
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	628	'onclick="display_commit("%s")">') % (color,
				629	hash))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	630	lines.append(cgi.escape(c))
				631	column += 1
				632	row += 1
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	633	commit_data = ['{\n']
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	634	commit_display_format = """\
				635	commit: {hash}
				636	Author: {author_name} <{author_email}>
				637	Date: {author_date}
				638
				639	{message}
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	640
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	641	"""
				642	commit_display_format = textwrap.dedent(commit_display_format)
				643	links = re.compile(r'(https?:\/\/\S+)')
				644	for hash in commits:
				645	commit = commits[hash]
				646	commit_display = commit_display_format.format(
				647	hash=hash,
				648	author_name=commit.author_name,
				649	author_email=commit.author_email,
				650	author_date=commit.author_date,
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	651	message=commit.message)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	652	commit_display = cgi.escape(commit_display, quote=True)
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	653	commit_display += colorize_diff(commit.diff)
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	654	commit_display = re.sub(links, '<a href=\\"\\1\\">\\1</a>', commit_display)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	655	commit_display = commit_display.replace('\n', '\\n')
Tom Anderson	ac918bb6	2018-04-17 00:37:34	[diff] [blame^]	656	commit_data.append('"%s": "%s",\n' % (hash, commit_display))
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	657	commit_data.append('}')
				658	commit_data = ''.join(commit_data)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	659	line_nums = range(1, row if lastline.strip() == '' else row + 1)
				660	line_nums = '\n'.join([str(num) for num in line_nums])
				661	lines = ''.join(lines)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	662	return html % (commit_data, line_nums, lines)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	663
				664
				665	def show_visualization(html):
				666	"""Display \|html\| in a web browser.
				667
				668	Args:
				669	html: The contents of the file to display, as a string.
				670	"""
				671	# Keep the temporary file around so the browser has time to open it.
				672	# TODO(thomasanderson): spin up a temporary web server to serve this
				673	# file so we don't have to leak it.
				674	html_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html')
				675	html_file.write(html)
				676	html_file.flush()
				677	if sys.platform.startswith('linux'):
				678	# Don't show any messages when starting the browser.
				679	saved_stdout = os.dup(1)
				680	saved_stderr = os.dup(2)
				681	os.close(1)
				682	os.close(2)
				683	os.open(os.devnull, os.O_RDWR)
				684	os.open(os.devnull, os.O_RDWR)
				685	webbrowser.open('file://' + html_file.name)
				686	if sys.platform.startswith('linux'):
				687	os.dup2(saved_stdout, 1)
				688	os.dup2(saved_stderr, 2)
				689	os.close(saved_stdout)
				690	os.close(saved_stderr)
				691
				692
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	693	def main(argv):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	694	parser = argparse.ArgumentParser(
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	695	description='Show what revision last modified each token of a file.')
				696	parser.add_argument(
				697	'revision',
				698	default='HEAD',
				699	nargs='?',
				700	help='show only commits starting from a revision')
				701	parser.add_argument('file', help='the file to uberblame')
				702	parser.add_argument(
				703	'--skip-visualization',
				704	action='store_true',
				705	help='do not display the blame visualization in a web browser')
				706	parser.add_argument(
				707	'--tokenize-by-char',
				708	action='store_true',
				709	help='treat individual characters as tokens')
				710	parser.add_argument(
				711	'--tokenize-whitespace',
				712	action='store_true',
				713	help='also blame non-newline whitespace characters')
				714	args = parser.parse_args(argv)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	715
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	716	def tokenization_method(data):
				717	return tokenize_data(data, args.tokenize_by_char, args.tokenize_whitespace)
				718
				719	data, blame = uberblame(args.file, args.revision, tokenization_method)
				720	html = create_visualization(data, blame)
				721	if not args.skip_visualization:
				722	show_visualization(html)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	723	return 0
				724
				725
				726	if __name__ == '__main__':
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	727	sys.exit(main(sys.argv[1:]))