skym | 1ce6ac50 | 2016-10-05 00:26:45 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright 2016 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """This script will search through the target folder specified and try to find |
| 7 | duplicate includes from h and cc files, and remove them from the cc files. The |
| 8 | current/working directory needs to be chromium_checkout/src/ when this tool is |
| 9 | run. |
| 10 | |
| 11 | Usage: remove_duplicate_includes.py --dry-run components/foo components/bar |
| 12 | """ |
| 13 | |
Raul Tambre | 66e754d | 2019-09-25 12:03:44 | [diff] [blame] | 14 | from __future__ import print_function |
| 15 | |
skym | 1ce6ac50 | 2016-10-05 00:26:45 | [diff] [blame] | 16 | import argparse |
| 17 | import collections |
| 18 | import logging |
| 19 | import os |
| 20 | import re |
| 21 | import sys |
| 22 | |
| 23 | # This could be generalized if desired, and moved to command line arguments. |
| 24 | H_FILE_SUFFIX = '.h' |
| 25 | CC_FILE_SUFFIX = '.cc' |
| 26 | |
| 27 | # The \s should allow us to ignore any whitespace and only focus on the group |
| 28 | # captured when comparing between files. |
| 29 | INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$') |
| 30 | |
| 31 | def HasSuffix(file_name, suffix): |
| 32 | return os.path.splitext(file_name)[1] == suffix |
| 33 | |
| 34 | def IsEmpty(line): |
| 35 | return not line.strip() |
| 36 | |
| 37 | def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name): |
| 38 | """Finds and returns the corresponding include set for the given .cc file. |
| 39 | |
| 40 | This is done by finding the first include in the file and then trying to look |
| 41 | up an .h file in the passed in map. If not present, then None is returned |
| 42 | immediately. |
| 43 | """ |
| 44 | for line in input_lines: |
| 45 | match = INCLUDE_REGEX.search(line) |
| 46 | # The first include match should be the corresponding .h file, else skip. |
| 47 | if match: |
| 48 | h_file_path = os.path.join(os.getcwd(), match.group(2)) |
| 49 | if h_file_path not in h_path_to_include_set: |
Raul Tambre | 66e754d | 2019-09-25 12:03:44 | [diff] [blame] | 50 | print('First include did not match to a known .h file, skipping ' + \ |
| 51 | cc_file_name + ', line: ' + match.group(1)) |
skym | 1ce6ac50 | 2016-10-05 00:26:45 | [diff] [blame] | 52 | return None |
| 53 | return h_path_to_include_set[h_file_path] |
| 54 | |
| 55 | def WithoutDuplicates(input_lines, include_set, cc_file_name): |
| 56 | """Checks every input line and sees if we can remove it based on the contents |
| 57 | of the given include set. |
| 58 | |
| 59 | Returns what the new contents of the file should be. |
| 60 | """ |
| 61 | output_lines = [] |
| 62 | # When a section of includes are completely removed, we want to remove the |
| 63 | # trailing empty as well. |
| 64 | lastCopiedLineWasEmpty = False |
| 65 | lastLineWasOmitted = False |
| 66 | for line in input_lines: |
| 67 | match = INCLUDE_REGEX.search(line) |
| 68 | if match and match.group(2) in include_set: |
Raul Tambre | 66e754d | 2019-09-25 12:03:44 | [diff] [blame] | 69 | print('Removed ' + match.group(1) + ' from ' + cc_file_name) |
skym | 1ce6ac50 | 2016-10-05 00:26:45 | [diff] [blame] | 70 | lastLineWasOmitted = True |
| 71 | elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line): |
Raul Tambre | 66e754d | 2019-09-25 12:03:44 | [diff] [blame] | 72 | print('Removed empty line from ' + cc_file_name) |
skym | 1ce6ac50 | 2016-10-05 00:26:45 | [diff] [blame] | 73 | lastLineWasOmitted = True |
| 74 | else: |
| 75 | lastCopiedLineWasEmpty = IsEmpty(line) |
| 76 | lastLineWasOmitted = False |
| 77 | output_lines.append(line) |
| 78 | return output_lines |
| 79 | |
| 80 | def main(): |
| 81 | parser = argparse.ArgumentParser() |
| 82 | parser.add_argument('--dry-run', action='store_true', |
| 83 | help='Does not actually remove lines when specified.') |
| 84 | parser.add_argument('targets', nargs='+', |
| 85 | help='Relative path to folders to search for duplicate includes in.') |
| 86 | args = parser.parse_args() |
| 87 | |
| 88 | # A map of header file paths to the includes they contain. |
| 89 | h_path_to_include_set = {} |
| 90 | |
| 91 | # Simply collects the path of all cc files present. |
| 92 | cc_file_path_set = set() |
| 93 | |
| 94 | for relative_root in args.targets: |
| 95 | absolute_root = os.path.join(os.getcwd(), relative_root) |
| 96 | for dir_path, dir_name_list, file_name_list in os.walk(absolute_root): |
| 97 | for file_name in file_name_list: |
| 98 | file_path = os.path.join(dir_path, file_name) |
| 99 | if HasSuffix(file_name, H_FILE_SUFFIX): |
| 100 | # By manually adding the set instead of using defaultdict we can avoid |
| 101 | # warning about missing .h files when the .h file has no includes. |
| 102 | h_path_to_include_set[file_path] = set() |
| 103 | with open(file_path) as fh: |
| 104 | for line in fh: |
| 105 | match = INCLUDE_REGEX.search(line) |
| 106 | if match: |
| 107 | h_path_to_include_set[file_path].add(match.group(2)) |
| 108 | elif HasSuffix(file_name, CC_FILE_SUFFIX): |
| 109 | cc_file_path_set.add(file_path) |
| 110 | |
| 111 | for cc_file_path in cc_file_path_set: |
| 112 | cc_file_name = os.path.basename(cc_file_path) |
| 113 | with open(cc_file_path, 'r' if args.dry_run else 'r+') as fh: |
| 114 | # Read out all lines and reset file position to allow overwriting. |
| 115 | input_lines = fh.readlines() |
| 116 | fh.seek(0) |
| 117 | include_set = FindIncludeSet(input_lines, h_path_to_include_set, |
| 118 | cc_file_name) |
| 119 | if include_set: |
| 120 | output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name) |
| 121 | if not args.dry_run: |
| 122 | fh.writelines(output_lines) |
| 123 | fh.truncate() |
| 124 | |
| 125 | if __name__ == '__main__': |
| 126 | sys.exit(main()) |