Blame - tools/uberblame.py - chromium/src

blob: 5844f37aad66903cae89b48d1a1666066f66c3eb [file] [log] [blame]

Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright 2017 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	import argparse
				7	import cgi
				8	import colorsys
				9	import difflib
				10	import random
				11	import os
				12	import re
				13	import subprocess
				14	import sys
				15	import tempfile
				16	import textwrap
				17	import webbrowser
				18
				19
				20	class TokenContext(object):
				21	"""Metadata about a token.
				22
				23	Attributes:
				24	row: Row index of the token in the data file.
				25	column: Column index of the token in the data file.
				26	token: The token string.
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	27	commit: A Commit object that corresponds to the commit that added
				28	this token.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	29	"""
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	30
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	31	def __init__(self, row, column, token, commit=None):
				32	self.row = row
				33	self.column = column
				34	self.token = token
				35	self.commit = commit
				36
				37
				38	class Commit(object):
				39	"""Commit data.
				40
				41	Attributes:
				42	hash: The commit hash.
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	43	author_name: The author's name.
				44	author_email: the author's email.
				45	author_date: The date and time the author created this commit.
				46	message: The commit message.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	47	diff: The commit diff.
				48	"""
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	49
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	50	def __init__(self, hash, author_name, author_email, author_date, message,
				51	diff):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	52	self.hash = hash
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	53	self.author_name = author_name
				54	self.author_email = author_email
				55	self.author_date = author_date
				56	self.message = message
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	57	self.diff = diff
				58
				59
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	60	def tokenize_data(data, tokenize_by_char, tokenize_whitespace):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	61	"""Tokenizes \|data\|.
				62
				63	Args:
				64	data: String to tokenize.
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	65	tokenize_by_char: If true, individual characters are treated as tokens.
				66	Otherwise, tokens are either symbols or strings of both alphanumeric
				67	characters and underscores.
				68	tokenize_whitespace: Treat non-newline whitespace characters as tokens.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	69
				70	Returns:
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	71	A list of lists of TokenContexts. Each list represents a line.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	72	"""
				73	contexts = []
				74	in_identifier = False
				75	identifier_start = 0
				76	identifier = ''
				77	row = 0
				78	column = 0
				79	line_contexts = []
				80
				81	for c in data + '\n':
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	82	if not tokenize_by_char and (c.isalnum() or c == '_'):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	83	if in_identifier:
				84	identifier += c
				85	else:
				86	in_identifier = True
				87	identifier_start = column
				88	identifier = c
				89	else:
				90	if in_identifier:
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	91	line_contexts.append(TokenContext(row, identifier_start, identifier))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	92	in_identifier = False
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	93	if not c.isspace() or (tokenize_whitespace and c != '\n'):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	94	line_contexts.append(TokenContext(row, column, c))
				95
				96	if c == '\n':
				97	row += 1
				98	column = 0
				99	contexts.append(line_contexts)
				100	line_tokens = []
				101	line_contexts = []
				102	else:
				103	column += 1
				104	return contexts
				105
				106
				107	def compute_unified_diff(old_tokens, new_tokens):
				108	"""Computes the diff between \|old_tokens\| and \|new_tokens\|.
				109
				110	Args:
				111	old_tokens: Token strings corresponding to the old data.
				112	new_tokens: Token strings corresponding to the new data.
				113
				114	Returns:
				115	The diff, in unified diff format.
				116	"""
				117	return difflib.unified_diff(old_tokens, new_tokens, n=0, lineterm='')
				118
				119
				120	def parse_chunk_header_file_range(file_range):
				121	"""Parses a chunk header file range.
				122
				123	Diff chunk headers have the form:
				124	@@ -<file-range> +<file-range> @@
				125	File ranges have the form:
				126	<start line number>,<number of lines changed>
				127
				128	Args:
				129	file_range: A chunk header file range.
				130
				131	Returns:
				132	A tuple (range_start, range_end). The endpoints are adjusted such that
				133	iterating over [range_start, range_end) will give the changed indices.
				134	"""
				135	if ',' in file_range:
				136	file_range_parts = file_range.split(',')
				137	start = int(file_range_parts[0])
				138	amount = int(file_range_parts[1])
				139	if amount == 0:
				140	return (start, start)
				141	return (start - 1, start + amount - 1)
				142	else:
				143	return (int(file_range) - 1, int(file_range))
				144
				145
				146	def compute_changed_token_indices(previous_tokens, current_tokens):
				147	"""Computes changed and added tokens.
				148
				149	Args:
				150	previous_tokens: Tokens corresponding to the old file.
				151	current_tokens: Tokens corresponding to the new file.
				152
				153	Returns:
				154	A tuple (added_tokens, changed_tokens).
				155	added_tokens: A list of indices into \|current_tokens\|.
				156	changed_tokens: A map of indices into \|current_tokens\| to
				157	indices into \|previous_tokens\|.
				158	"""
				159	prev_file_chunk_end = 0
				160	prev_patched_chunk_end = 0
				161	added_tokens = []
				162	changed_tokens = {}
				163	for line in compute_unified_diff(previous_tokens, current_tokens):
				164	if line.startswith("@@"):
				165	parts = line.split(' ')
				166	removed = parts[1].lstrip('-')
				167	removed_start, removed_end = parse_chunk_header_file_range(removed)
				168	added = parts[2].lstrip('+')
				169	added_start, added_end = parse_chunk_header_file_range(added)
				170	for i in range(added_start, added_end):
				171	added_tokens.append(i)
				172	for i in range(0, removed_start - prev_patched_chunk_end):
				173	changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
				174	prev_patched_chunk_end = removed_end
				175	prev_file_chunk_end = added_end
				176	for i in range(0, len(previous_tokens) - prev_patched_chunk_end):
				177	changed_tokens[prev_file_chunk_end + i] = prev_patched_chunk_end + i
				178	return added_tokens, changed_tokens
				179
				180
				181	def flatten_nested_list(l):
				182	"""Flattens a list and provides a mapping from elements in the list back
				183	into the nested list.
				184
				185	Args:
				186	l: A list of lists.
				187
				188	Returns:
				189	A tuple (flattened, index_to_position):
				190	flattened: The flattened list.
				191	index_to_position: A list of pairs (r, c) such that
				192	index_to_position[i] == (r, c); flattened[i] == l[r][c]
				193	"""
				194	flattened = []
				195	index_to_position = {}
				196	r = 0
				197	c = 0
				198	for nested_list in l:
				199	for element in nested_list:
				200	index_to_position[len(flattened)] = (r, c)
				201	flattened.append(element)
				202	c += 1
				203	r += 1
				204	c = 0
				205	return (flattened, index_to_position)
				206
				207
				208	def compute_changed_token_positions(previous_tokens, current_tokens):
				209	"""Computes changed and added token positions.
				210
				211	Args:
				212	previous_tokens: A list of lists of token strings. Lines in the file
				213	correspond to the nested lists.
				214	current_tokens: A list of lists of token strings. Lines in the file
				215	correspond to the nested lists.
				216
				217	Returns:
				218	A tuple (added_token_positions, changed_token_positions):
				219	added_token_positions: A list of pairs that index into \|current_tokens\|.
				220	changed_token_positions: A map from pairs that index into
				221	\|current_tokens\| to pairs that index into \|previous_tokens\|.
				222	"""
				223	flat_previous_tokens, previous_index_to_position = flatten_nested_list(
				224	previous_tokens)
				225	flat_current_tokens, current_index_to_position = flatten_nested_list(
				226	current_tokens)
				227	added_indices, changed_indices = compute_changed_token_indices(
				228	flat_previous_tokens, flat_current_tokens)
				229	added_token_positions = [current_index_to_position[i] for i in added_indices]
				230	changed_token_positions = {
				231	current_index_to_position[current_i]:
				232	previous_index_to_position[changed_indices[current_i]]
				233	for current_i in changed_indices
				234	}
				235	return (added_token_positions, changed_token_positions)
				236
				237
				238	def parse_chunks_from_diff(diff):
				239	"""Returns a generator of chunk data from a diff.
				240
				241	Args:
				242	diff: A list of strings, with each string being a line from a diff
				243	in unified diff format.
				244
				245	Returns:
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	246	A generator of tuples (added_lines_start, added_lines_end, removed_lines)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	247	"""
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	248	it = iter(diff)
				249	for line in it:
				250	while not line.startswith('@@'):
				251	line = it.next()
				252	parts = line.split(' ')
				253	previous_start, previous_end = parse_chunk_header_file_range(
				254	parts[1].lstrip('-'))
				255	current_start, current_end = parse_chunk_header_file_range(
				256	parts[2].lstrip('+'))
				257
				258	in_delta = False
				259	added_lines_start = None
				260	added_lines_end = None
				261	removed_lines = []
				262	while previous_start < previous_end or current_start < current_end:
				263	line = it.next()
				264	firstchar = line[0]
				265	line = line[1:]
				266	if not in_delta and (firstchar == '-' or firstchar == '+'):
				267	in_delta = True
				268	added_lines_start = current_start
				269	added_lines_end = current_start
				270	removed_lines = []
				271
				272	if firstchar == '-':
				273	removed_lines.append(line)
				274	previous_start += 1
				275	elif firstchar == '+':
				276	current_start += 1
				277	added_lines_end = current_start
				278	elif firstchar == ' ':
				279	if in_delta:
				280	in_delta = False
				281	yield (added_lines_start, added_lines_end, removed_lines)
				282	previous_start += 1
				283	current_start += 1
				284	if in_delta:
				285	yield (added_lines_start, added_lines_end, removed_lines)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	286
				287
				288	def should_skip_commit(commit):
				289	"""Decides if \|commit\| should be skipped when computing the blame.
				290
				291	Commit 5d4451e deleted all files in the repo except for DEPS. The
				292	next commit, 1e7896, brought them back. This is a hack to skip
				293	those commits (except for the files they modified). If we did not
				294	do this, changes would be incorrectly attributed to 1e7896.
				295
				296	Args:
				297	commit: A Commit object.
				298
				299	Returns:
				300	A boolean indicating if this commit should be skipped.
				301	"""
				302	banned_commits = [
				303	'1e78967ed2f1937b3809c19d91e7dd62d756d307',
				304	'5d4451ebf298d9d71f716cc0135f465cec41fcd0',
				305	]
				306	if commit.hash not in banned_commits:
				307	return False
				308	banned_commits_file_exceptions = [
				309	'DEPS',
				310	'chrome/browser/ui/views/file_manager_dialog_browsertest.cc',
				311	]
				312	for line in commit.diff:
				313	if line.startswith('---') or line.startswith('+++'):
				314	if line.split(' ')[1] in banned_commits_file_exceptions:
				315	return False
				316	elif line.startswith('@@'):
				317	return True
				318	assert False
				319
				320
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	321	def generate_substrings(file):
				322	"""Generates substrings from a file stream, where substrings are
				323	separated by '\0'.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	324
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	325	For example, the input:
				326	'a\0bc\0\0\0d\0'
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	327	would produce the output:
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	328	['a', 'bc', 'd']
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	329
				330	Args:
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	331	file: A readable file.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	332	"""
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	333	BUF_SIZE = 448 # Experimentally found to be pretty fast.
				334	data = []
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	335	while True:
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	336	buf = file.read(BUF_SIZE)
				337	parts = buf.split('\0')
				338	data.append(parts[0])
				339	if len(parts) > 1:
				340	joined = ''.join(data)
				341	if joined != '':
				342	yield joined
				343	for i in range(1, len(parts) - 1):
				344	if parts[i] != '':
				345	yield parts[i]
				346	data = [parts[-1]]
				347	if len(buf) < BUF_SIZE:
				348	joined = ''.join(data)
				349	if joined != '':
				350	yield joined
				351	return
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	352
				353
				354	def generate_commits(git_log_stdout):
				355	"""Parses git log output into a stream of Commit objects.
				356	"""
Tom Anderson	1e71692	2017-10-12 19:43:49	[diff] [blame]	357	substring_generator = generate_substrings(git_log_stdout)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	358	while True:
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	359	hash = substring_generator.next()
				360	author_name = substring_generator.next()
				361	author_email = substring_generator.next()
				362	author_date = substring_generator.next()
				363	message = substring_generator.next()
				364	diff = substring_generator.next().split('\n')
				365	yield Commit(hash, author_name, author_email, author_date, message, diff)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	366
				367
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	368	def uberblame_aux(file_name, git_log_stdout, data, tokenization_method):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	369	"""Computes the uberblame of file \|file_name\|.
				370
				371	Args:
				372	file_name: File to uberblame.
				373	git_log_stdout: A file object that represents the git log output.
				374	data: A string containing the data of file \|file_name\|.
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	375	tokenization_method: A function that takes a string and returns a list of
				376	TokenContexts.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	377
				378	Returns:
				379	A tuple (data, blame).
				380	data: File contents.
				381	blame: A list of TokenContexts.
				382	"""
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	383	blame = tokenization_method(data)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	384
				385	blamed_tokens = 0
				386	total_tokens = len(blame)
				387	uber_blame = (data, blame[:])
				388
				389	for commit in generate_commits(git_log_stdout):
				390	if should_skip_commit(commit):
				391	continue
				392
				393	offset = 0
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	394	for (added_lines_start, added_lines_end,
				395	removed_lines) in parse_chunks_from_diff(commit.diff):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	396	added_lines_start += offset
				397	added_lines_end += offset
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	398	previous_contexts = [
				399	token_lines
				400	for line_previous in removed_lines
				401	for token_lines in tokenization_method(line_previous)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	402	]
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	403	previous_tokens = [[context.token for context in contexts]
				404	for contexts in previous_contexts]
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	405	current_contexts = blame[added_lines_start:added_lines_end]
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	406	current_tokens = [[context.token for context in contexts]
				407	for contexts in current_contexts]
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	408	added_token_positions, changed_token_positions = (
				409	compute_changed_token_positions(previous_tokens, current_tokens))
				410	for r, c in added_token_positions:
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	411	current_contexts[r][c].commit = commit
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	412	blamed_tokens += 1
				413	for r, c in changed_token_positions:
				414	pr, pc = changed_token_positions[(r, c)]
				415	previous_contexts[pr][pc] = current_contexts[r][c]
				416
				417	assert added_lines_start <= added_lines_end <= len(blame)
				418	current_blame_size = len(blame)
				419	blame[added_lines_start:added_lines_end] = previous_contexts
				420	offset += len(blame) - current_blame_size
				421
				422	assert blame == [] or blame == [[]]
				423	return uber_blame
				424
				425
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	426	def uberblame(file_name, revision, tokenization_method):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	427	"""Computes the uberblame of file \|file_name\|.
				428
				429	Args:
				430	file_name: File to uberblame.
				431	revision: The revision to start the uberblame at.
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	432	tokenization_method: A function that takes a string and returns a list of
				433	TokenContexts.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	434
				435	Returns:
				436	A tuple (data, blame).
				437	data: File contents.
				438	blame: A list of TokenContexts.
				439	"""
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	440	DIFF_CONTEXT = 3
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	441	cmd_git_log = [
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	442	'git', 'log', '--minimal', '--no-prefix', '--follow', '-m',
Tom Anderson	500216114	2018-04-17 00:30:21	[diff] [blame]	443	'--first-parent', '-p',
				444	'-U%d' % DIFF_CONTEXT, '-z', '--format=%x00%H%x00%an%x00%ae%x00%ad%x00%B',
				445	revision, '--', file_name
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	446	]
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	447	git_log = subprocess.Popen(
				448	cmd_git_log, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	449	data = subprocess.check_output(
				450	['git', 'show', '%s:%s' % (revision, file_name)])
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	451	data, blame = uberblame_aux(file_name, git_log.stdout, data,
				452	tokenization_method)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	453
				454	_, stderr = git_log.communicate()
				455	if git_log.returncode != 0:
				456	raise subprocess.CalledProcessError(git_log.returncode, cmd_git_log, stderr)
				457	return data, blame
				458
				459
				460	def generate_pastel_color():
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	461	"""Generates a random color from a nice looking pastel palette.
				462
				463	Returns:
				464	The color, formatted as hex string. For example, white is "#FFFFFF".
				465	"""
				466	(h, l, s) = (random.uniform(0, 1), random.uniform(0.8, 0.9), random.uniform(
				467	0.5, 1))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	468	(r, g, b) = colorsys.hls_to_rgb(h, l, s)
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	469	return "#%0.2X%0.2X%0.2X" % (int(r * 255), int(g * 255), int(b * 255))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	470
				471
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	472	def create_visualization(data, blame):
				473	"""Creates a web page to visualize \|blame\|.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	474
				475	Args:
				476	data: The data file as returned by uberblame().
				477	blame: A list of TokenContexts as returned by uberblame().
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	478
				479	Returns;
				480	The html for the generated page, as a string.
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	481	"""
				482	# Use the same seed for the color generator on each run so that
				483	# loading the same blame of the same file twice will result in the
				484	# same generated HTML page.
				485	random.seed(0x52937865ec62d1ea)
				486	html = """\
				487	<html>
				488	<head>
				489	<style>
				490	body {
				491	font-family: "Courier New";
				492	}
				493	pre {
				494	display: inline;
				495	}
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	496	span {
Tom Anderson	a671dad	2017-10-10 19:19:47	[diff] [blame]	497	outline: 1pt solid #00000030;
				498	outline-offset: -1pt;
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	499	cursor: pointer;
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	500	}
				501	#linenums {
				502	text-align: right;
				503	}
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	504	#file_display {
				505	position: absolute;
				506	left: 0;
				507	top: 0;
				508	width: 50%%;
				509	height: 100%%;
				510	overflow: scroll;
				511	}
				512	#commit_display_container {
				513	position: absolute;
				514	left: 50%%;
				515	top: 0;
				516	width: 50%%;
				517	height: 100%%;
				518	overflow: scroll;
				519	}
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	520	</style>
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	521	<script>
				522	commit_data = %s;
				523	function display_commit(hash) {
				524	var e = document.getElementById("commit_display");
				525	e.innerHTML = commit_data[hash]
				526	}
				527	</script>
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	528	</head>
				529	<body>
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	530	<div id="file_display">
				531	<table>
				532	<tbody>
				533	<tr>
				534	<td valign="top" id="linenums">
				535	<pre>%s</pre>
				536	</td>
				537	<td valign="top">
				538	<pre>%s</pre>
				539	</td>
				540	</tr>
				541	</tbody>
				542	</table>
				543	</div>
				544	<div id="commit_display_container" valign="top">
				545	<pre id="commit_display" />
				546	</div>
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	547	</body>
				548	</html>
				549	"""
				550	html = textwrap.dedent(html)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	551	commits = {}
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	552	lines = []
				553	commit_colors = {}
				554	blame_index = 0
				555	blame = [context for contexts in blame for context in contexts]
				556	row = 0
				557	lastline = ''
				558	for line in data.split('\n'):
				559	lastline = line
				560	column = 0
				561	for c in line + '\n':
				562	if blame_index < len(blame):
				563	token_context = blame[blame_index]
				564	if (row == token_context.row and
				565	column == token_context.column + len(token_context.token)):
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	566	if (blame_index + 1 == len(blame) or blame[blame_index].commit.hash !=
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	567	blame[blame_index + 1].commit.hash):
				568	lines.append('</span>')
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	569	blame_index += 1
				570	if blame_index < len(blame):
				571	token_context = blame[blame_index]
				572	if row == token_context.row and column == token_context.column:
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	573	if (blame_index == 0 or blame[blame_index - 1].commit.hash !=
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	574	blame[blame_index].commit.hash):
				575	hash = token_context.commit.hash
				576	commits[hash] = token_context.commit
				577	if hash not in commit_colors:
				578	commit_colors[hash] = generate_pastel_color()
				579	color = commit_colors[hash]
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	580	lines.append(('<span style="background-color: %s" ' +
				581	'onclick="display_commit("%s")">') % (color,
				582	hash))
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	583	lines.append(cgi.escape(c))
				584	column += 1
				585	row += 1
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	586	commit_data = ['{']
				587	commit_display_format = """\
				588	commit: {hash}
				589	Author: {author_name} <{author_email}>
				590	Date: {author_date}
				591
				592	{message}
				593	"""
				594	commit_display_format = textwrap.dedent(commit_display_format)
				595	links = re.compile(r'(https?:\/\/\S+)')
				596	for hash in commits:
				597	commit = commits[hash]
				598	commit_display = commit_display_format.format(
				599	hash=hash,
				600	author_name=commit.author_name,
				601	author_email=commit.author_email,
				602	author_date=commit.author_date,
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	603	message=commit.message)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	604	commit_display = cgi.escape(commit_display, quote=True)
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	605	commit_display = re.sub(links, '<a href=\\"\\1\\">\\1</a>', commit_display)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	606	commit_display = commit_display.replace('\n', '\\n')
				607	commit_data.append('"%s": "%s",' % (hash, commit_display))
				608	commit_data.append('}')
				609	commit_data = ''.join(commit_data)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	610	line_nums = range(1, row if lastline.strip() == '' else row + 1)
				611	line_nums = '\n'.join([str(num) for num in line_nums])
				612	lines = ''.join(lines)
Tom Anderson	6541015	2017-10-17 01:53:19	[diff] [blame]	613	return html % (commit_data, line_nums, lines)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	614
				615
				616	def show_visualization(html):
				617	"""Display \|html\| in a web browser.
				618
				619	Args:
				620	html: The contents of the file to display, as a string.
				621	"""
				622	# Keep the temporary file around so the browser has time to open it.
				623	# TODO(thomasanderson): spin up a temporary web server to serve this
				624	# file so we don't have to leak it.
				625	html_file = tempfile.NamedTemporaryFile(delete=False, suffix='.html')
				626	html_file.write(html)
				627	html_file.flush()
				628	if sys.platform.startswith('linux'):
				629	# Don't show any messages when starting the browser.
				630	saved_stdout = os.dup(1)
				631	saved_stderr = os.dup(2)
				632	os.close(1)
				633	os.close(2)
				634	os.open(os.devnull, os.O_RDWR)
				635	os.open(os.devnull, os.O_RDWR)
				636	webbrowser.open('file://' + html_file.name)
				637	if sys.platform.startswith('linux'):
				638	os.dup2(saved_stdout, 1)
				639	os.dup2(saved_stderr, 2)
				640	os.close(saved_stdout)
				641	os.close(saved_stderr)
				642
				643
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	644	def main(argv):
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	645	parser = argparse.ArgumentParser(
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	646	description='Show what revision last modified each token of a file.')
				647	parser.add_argument(
				648	'revision',
				649	default='HEAD',
				650	nargs='?',
				651	help='show only commits starting from a revision')
				652	parser.add_argument('file', help='the file to uberblame')
				653	parser.add_argument(
				654	'--skip-visualization',
				655	action='store_true',
				656	help='do not display the blame visualization in a web browser')
				657	parser.add_argument(
				658	'--tokenize-by-char',
				659	action='store_true',
				660	help='treat individual characters as tokens')
				661	parser.add_argument(
				662	'--tokenize-whitespace',
				663	action='store_true',
				664	help='also blame non-newline whitespace characters')
				665	args = parser.parse_args(argv)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	666
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	667	def tokenization_method(data):
				668	return tokenize_data(data, args.tokenize_by_char, args.tokenize_whitespace)
				669
				670	data, blame = uberblame(args.file, args.revision, tokenization_method)
				671	html = create_visualization(data, blame)
				672	if not args.skip_visualization:
				673	show_visualization(html)
Tom Anderson	c3ed896	2017-10-09 19:01:46	[diff] [blame]	674	return 0
				675
				676
				677	if __name__ == '__main__':
Tom Anderson	b3d7e64	2018-04-13 16:23:42	[diff] [blame]	678	sys.exit(main(sys.argv[1:]))