blob: f09d8cc9fdd31eedf4e35f7dc94c0cdd6be837bd [file] [log] [blame]
skym1ce6ac502016-10-05 00:26:451#!/usr/bin/env python
2# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""This script will search through the target folder specified and try to find
7duplicate includes from h and cc files, and remove them from the cc files. The
8current/working directory needs to be chromium_checkout/src/ when this tool is
9run.
10
11Usage: remove_duplicate_includes.py --dry-run components/foo components/bar
12"""
13
Raul Tambre66e754d2019-09-25 12:03:4414from __future__ import print_function
15
skym1ce6ac502016-10-05 00:26:4516import argparse
17import collections
18import logging
19import os
20import re
21import sys
22
23# This could be generalized if desired, and moved to command line arguments.
24H_FILE_SUFFIX = '.h'
25CC_FILE_SUFFIX = '.cc'
26
27# The \s should allow us to ignore any whitespace and only focus on the group
28# captured when comparing between files.
29INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$')
30
31def HasSuffix(file_name, suffix):
32 return os.path.splitext(file_name)[1] == suffix
33
34def IsEmpty(line):
35 return not line.strip()
36
37def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name):
38 """Finds and returns the corresponding include set for the given .cc file.
39
40 This is done by finding the first include in the file and then trying to look
41 up an .h file in the passed in map. If not present, then None is returned
42 immediately.
43 """
44 for line in input_lines:
45 match = INCLUDE_REGEX.search(line)
46 # The first include match should be the corresponding .h file, else skip.
47 if match:
48 h_file_path = os.path.join(os.getcwd(), match.group(2))
49 if h_file_path not in h_path_to_include_set:
Raul Tambre66e754d2019-09-25 12:03:4450 print('First include did not match to a known .h file, skipping ' + \
51 cc_file_name + ', line: ' + match.group(1))
skym1ce6ac502016-10-05 00:26:4552 return None
53 return h_path_to_include_set[h_file_path]
54
55def WithoutDuplicates(input_lines, include_set, cc_file_name):
56 """Checks every input line and sees if we can remove it based on the contents
57 of the given include set.
58
59 Returns what the new contents of the file should be.
60 """
61 output_lines = []
62 # When a section of includes are completely removed, we want to remove the
63 # trailing empty as well.
64 lastCopiedLineWasEmpty = False
65 lastLineWasOmitted = False
66 for line in input_lines:
67 match = INCLUDE_REGEX.search(line)
68 if match and match.group(2) in include_set:
Raul Tambre66e754d2019-09-25 12:03:4469 print('Removed ' + match.group(1) + ' from ' + cc_file_name)
skym1ce6ac502016-10-05 00:26:4570 lastLineWasOmitted = True
71 elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):
Raul Tambre66e754d2019-09-25 12:03:4472 print('Removed empty line from ' + cc_file_name)
skym1ce6ac502016-10-05 00:26:4573 lastLineWasOmitted = True
74 else:
75 lastCopiedLineWasEmpty = IsEmpty(line)
76 lastLineWasOmitted = False
77 output_lines.append(line)
78 return output_lines
79
80def main():
81 parser = argparse.ArgumentParser()
82 parser.add_argument('--dry-run', action='store_true',
83 help='Does not actually remove lines when specified.')
84 parser.add_argument('targets', nargs='+',
85 help='Relative path to folders to search for duplicate includes in.')
86 args = parser.parse_args()
87
88 # A map of header file paths to the includes they contain.
89 h_path_to_include_set = {}
90
91 # Simply collects the path of all cc files present.
92 cc_file_path_set = set()
93
94 for relative_root in args.targets:
95 absolute_root = os.path.join(os.getcwd(), relative_root)
96 for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):
97 for file_name in file_name_list:
98 file_path = os.path.join(dir_path, file_name)
99 if HasSuffix(file_name, H_FILE_SUFFIX):
100 # By manually adding the set instead of using defaultdict we can avoid
101 # warning about missing .h files when the .h file has no includes.
102 h_path_to_include_set[file_path] = set()
103 with open(file_path) as fh:
104 for line in fh:
105 match = INCLUDE_REGEX.search(line)
106 if match:
107 h_path_to_include_set[file_path].add(match.group(2))
108 elif HasSuffix(file_name, CC_FILE_SUFFIX):
109 cc_file_path_set.add(file_path)
110
111 for cc_file_path in cc_file_path_set:
112 cc_file_name = os.path.basename(cc_file_path)
113 with open(cc_file_path, 'r' if args.dry_run else 'r+') as fh:
114 # Read out all lines and reset file position to allow overwriting.
115 input_lines = fh.readlines()
116 fh.seek(0)
117 include_set = FindIncludeSet(input_lines, h_path_to_include_set,
118 cc_file_name)
119 if include_set:
120 output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name)
121 if not args.dry_run:
122 fh.writelines(output_lines)
123 fh.truncate()
124
125if __name__ == '__main__':
126 sys.exit(main())