Blame - tools/remove_duplicate_includes.py - chromium/src

blob: f09d8cc9fdd31eedf4e35f7dc94c0cdd6be837bd [file] [log] [blame]

skym	1ce6ac50	2016-10-05 00:26:45	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright 2016 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	"""This script will search through the target folder specified and try to find
				7	duplicate includes from h and cc files, and remove them from the cc files. The
				8	current/working directory needs to be chromium_checkout/src/ when this tool is
				9	run.
				10
				11	Usage: remove_duplicate_includes.py --dry-run components/foo components/bar
				12	"""
				13
Raul Tambre	66e754d	2019-09-25 12:03:44	[diff] [blame]	14	from __future__ import print_function
				15
skym	1ce6ac50	2016-10-05 00:26:45	[diff] [blame]	16	import argparse
				17	import collections
				18	import logging
				19	import os
				20	import re
				21	import sys
				22
				23	# This could be generalized if desired, and moved to command line arguments.
				24	H_FILE_SUFFIX = '.h'
				25	CC_FILE_SUFFIX = '.cc'
				26
				27	# The \s should allow us to ignore any whitespace and only focus on the group
				28	# captured when comparing between files.
				29	INCLUDE_REGEX = re.compile('^\s(#include\s+[\"<](.?)[\">])\s*$')
				30
				31	def HasSuffix(file_name, suffix):
				32	return os.path.splitext(file_name)[1] == suffix
				33
				34	def IsEmpty(line):
				35	return not line.strip()
				36
				37	def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name):
				38	"""Finds and returns the corresponding include set for the given .cc file.
				39
				40	This is done by finding the first include in the file and then trying to look
				41	up an .h file in the passed in map. If not present, then None is returned
				42	immediately.
				43	"""
				44	for line in input_lines:
				45	match = INCLUDE_REGEX.search(line)
				46	# The first include match should be the corresponding .h file, else skip.
				47	if match:
				48	h_file_path = os.path.join(os.getcwd(), match.group(2))
				49	if h_file_path not in h_path_to_include_set:
Raul Tambre	66e754d	2019-09-25 12:03:44	[diff] [blame]	50	print('First include did not match to a known .h file, skipping ' + \
				51	cc_file_name + ', line: ' + match.group(1))
skym	1ce6ac50	2016-10-05 00:26:45	[diff] [blame]	52	return None
				53	return h_path_to_include_set[h_file_path]
				54
				55	def WithoutDuplicates(input_lines, include_set, cc_file_name):
				56	"""Checks every input line and sees if we can remove it based on the contents
				57	of the given include set.
				58
				59	Returns what the new contents of the file should be.
				60	"""
				61	output_lines = []
				62	# When a section of includes are completely removed, we want to remove the
				63	# trailing empty as well.
				64	lastCopiedLineWasEmpty = False
				65	lastLineWasOmitted = False
				66	for line in input_lines:
				67	match = INCLUDE_REGEX.search(line)
				68	if match and match.group(2) in include_set:
Raul Tambre	66e754d	2019-09-25 12:03:44	[diff] [blame]	69	print('Removed ' + match.group(1) + ' from ' + cc_file_name)
skym	1ce6ac50	2016-10-05 00:26:45	[diff] [blame]	70	lastLineWasOmitted = True
				71	elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):
Raul Tambre	66e754d	2019-09-25 12:03:44	[diff] [blame]	72	print('Removed empty line from ' + cc_file_name)
skym	1ce6ac50	2016-10-05 00:26:45	[diff] [blame]	73	lastLineWasOmitted = True
				74	else:
				75	lastCopiedLineWasEmpty = IsEmpty(line)
				76	lastLineWasOmitted = False
				77	output_lines.append(line)
				78	return output_lines
				79
				80	def main():
				81	parser = argparse.ArgumentParser()
				82	parser.add_argument('--dry-run', action='store_true',
				83	help='Does not actually remove lines when specified.')
				84	parser.add_argument('targets', nargs='+',
				85	help='Relative path to folders to search for duplicate includes in.')
				86	args = parser.parse_args()
				87
				88	# A map of header file paths to the includes they contain.
				89	h_path_to_include_set = {}
				90
				91	# Simply collects the path of all cc files present.
				92	cc_file_path_set = set()
				93
				94	for relative_root in args.targets:
				95	absolute_root = os.path.join(os.getcwd(), relative_root)
				96	for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):
				97	for file_name in file_name_list:
				98	file_path = os.path.join(dir_path, file_name)
				99	if HasSuffix(file_name, H_FILE_SUFFIX):
				100	# By manually adding the set instead of using defaultdict we can avoid
				101	# warning about missing .h files when the .h file has no includes.
				102	h_path_to_include_set[file_path] = set()
				103	with open(file_path) as fh:
				104	for line in fh:
				105	match = INCLUDE_REGEX.search(line)
				106	if match:
				107	h_path_to_include_set[file_path].add(match.group(2))
				108	elif HasSuffix(file_name, CC_FILE_SUFFIX):
				109	cc_file_path_set.add(file_path)
				110
				111	for cc_file_path in cc_file_path_set:
				112	cc_file_name = os.path.basename(cc_file_path)
				113	with open(cc_file_path, 'r' if args.dry_run else 'r+') as fh:
				114	# Read out all lines and reset file position to allow overwriting.
				115	input_lines = fh.readlines()
				116	fh.seek(0)
				117	include_set = FindIncludeSet(input_lines, h_path_to_include_set,
				118	cc_file_name)
				119	if include_set:
				120	output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name)
				121	if not args.dry_run:
				122	fh.writelines(output_lines)
				123	fh.truncate()
				124
				125	if __name__ == '__main__':
				126	sys.exit(main())