sbc | 0cec9d7 | 2014-11-24 17:25:29 | [diff] [blame] | 1 | """File wrangling.""" |
| 2 | |
| 3 | from coverage.backward import to_string |
| 4 | from coverage.misc import CoverageException |
| 5 | import fnmatch, os, os.path, re, sys |
| 6 | import ntpath, posixpath |
| 7 | |
| 8 | class FileLocator(object): |
| 9 | """Understand how filenames work.""" |
| 10 | |
| 11 | def __init__(self): |
| 12 | # The absolute path to our current directory. |
| 13 | self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep) |
| 14 | |
| 15 | # Cache of results of calling the canonical_filename() method, to |
| 16 | # avoid duplicating work. |
| 17 | self.canonical_filename_cache = {} |
| 18 | |
| 19 | def relative_filename(self, filename): |
| 20 | """Return the relative form of `filename`. |
| 21 | |
| 22 | The filename will be relative to the current directory when the |
| 23 | `FileLocator` was constructed. |
| 24 | |
| 25 | """ |
| 26 | fnorm = os.path.normcase(filename) |
| 27 | if fnorm.startswith(self.relative_dir): |
| 28 | filename = filename[len(self.relative_dir):] |
| 29 | return filename |
| 30 | |
| 31 | def canonical_filename(self, filename): |
| 32 | """Return a canonical filename for `filename`. |
| 33 | |
| 34 | An absolute path with no redundant components and normalized case. |
| 35 | |
| 36 | """ |
| 37 | if filename not in self.canonical_filename_cache: |
| 38 | if not os.path.isabs(filename): |
| 39 | for path in [os.curdir] + sys.path: |
| 40 | if path is None: |
| 41 | continue |
| 42 | f = os.path.join(path, filename) |
| 43 | if os.path.exists(f): |
| 44 | filename = f |
| 45 | break |
| 46 | cf = abs_file(filename) |
| 47 | self.canonical_filename_cache[filename] = cf |
| 48 | return self.canonical_filename_cache[filename] |
| 49 | |
| 50 | def get_zip_data(self, filename): |
| 51 | """Get data from `filename` if it is a zip file path. |
| 52 | |
| 53 | Returns the string data read from the zip file, or None if no zip file |
| 54 | could be found or `filename` isn't in it. The data returned will be |
| 55 | an empty string if the file is empty. |
| 56 | |
| 57 | """ |
| 58 | import zipimport |
| 59 | markers = ['.zip'+os.sep, '.egg'+os.sep] |
| 60 | for marker in markers: |
| 61 | if marker in filename: |
| 62 | parts = filename.split(marker) |
| 63 | try: |
| 64 | zi = zipimport.zipimporter(parts[0]+marker[:-1]) |
| 65 | except zipimport.ZipImportError: |
| 66 | continue |
| 67 | try: |
| 68 | data = zi.get_data(parts[1]) |
| 69 | except IOError: |
| 70 | continue |
| 71 | return to_string(data) |
| 72 | return None |
| 73 | |
| 74 | |
| 75 | if sys.platform == 'win32': |
| 76 | |
| 77 | def actual_path(path): |
| 78 | """Get the actual path of `path`, including the correct case.""" |
| 79 | if path in actual_path.cache: |
| 80 | return actual_path.cache[path] |
| 81 | |
| 82 | head, tail = os.path.split(path) |
| 83 | if not tail: |
| 84 | actpath = head |
| 85 | elif not head: |
| 86 | actpath = tail |
| 87 | else: |
| 88 | head = actual_path(head) |
| 89 | if head in actual_path.list_cache: |
| 90 | files = actual_path.list_cache[head] |
| 91 | else: |
| 92 | try: |
| 93 | files = os.listdir(head) |
| 94 | except OSError: |
| 95 | files = [] |
| 96 | actual_path.list_cache[head] = files |
| 97 | normtail = os.path.normcase(tail) |
| 98 | for f in files: |
| 99 | if os.path.normcase(f) == normtail: |
| 100 | tail = f |
| 101 | break |
| 102 | actpath = os.path.join(head, tail) |
| 103 | actual_path.cache[path] = actpath |
| 104 | return actpath |
| 105 | |
| 106 | actual_path.cache = {} |
| 107 | actual_path.list_cache = {} |
| 108 | |
| 109 | else: |
| 110 | def actual_path(filename): |
| 111 | """The actual path for non-Windows platforms.""" |
| 112 | return filename |
| 113 | |
| 114 | |
| 115 | def abs_file(filename): |
| 116 | """Return the absolute normalized form of `filename`.""" |
| 117 | path = os.path.expandvars(os.path.expanduser(filename)) |
| 118 | path = os.path.abspath(os.path.realpath(path)) |
| 119 | path = actual_path(path) |
| 120 | return path |
| 121 | |
| 122 | |
| 123 | def isabs_anywhere(filename): |
| 124 | """Is `filename` an absolute path on any OS?""" |
| 125 | return ntpath.isabs(filename) or posixpath.isabs(filename) |
| 126 | |
| 127 | |
| 128 | def prep_patterns(patterns): |
| 129 | """Prepare the file patterns for use in a `FnmatchMatcher`. |
| 130 | |
| 131 | If a pattern starts with a wildcard, it is used as a pattern |
| 132 | as-is. If it does not start with a wildcard, then it is made |
| 133 | absolute with the current directory. |
| 134 | |
| 135 | If `patterns` is None, an empty list is returned. |
| 136 | |
| 137 | """ |
| 138 | prepped = [] |
| 139 | for p in patterns or []: |
| 140 | if p.startswith("*") or p.startswith("?"): |
| 141 | prepped.append(p) |
| 142 | else: |
| 143 | prepped.append(abs_file(p)) |
| 144 | return prepped |
| 145 | |
| 146 | |
| 147 | class TreeMatcher(object): |
| 148 | """A matcher for files in a tree.""" |
| 149 | def __init__(self, directories): |
| 150 | self.dirs = directories[:] |
| 151 | |
| 152 | def __repr__(self): |
| 153 | return "<TreeMatcher %r>" % self.dirs |
| 154 | |
| 155 | def info(self): |
| 156 | """A list of strings for displaying when dumping state.""" |
| 157 | return self.dirs |
| 158 | |
| 159 | def add(self, directory): |
| 160 | """Add another directory to the list we match for.""" |
| 161 | self.dirs.append(directory) |
| 162 | |
| 163 | def match(self, fpath): |
| 164 | """Does `fpath` indicate a file in one of our trees?""" |
| 165 | for d in self.dirs: |
| 166 | if fpath.startswith(d): |
| 167 | if fpath == d: |
| 168 | # This is the same file! |
| 169 | return True |
| 170 | if fpath[len(d)] == os.sep: |
| 171 | # This is a file in the directory |
| 172 | return True |
| 173 | return False |
| 174 | |
| 175 | |
| 176 | class FnmatchMatcher(object): |
| 177 | """A matcher for files by filename pattern.""" |
| 178 | def __init__(self, pats): |
| 179 | self.pats = pats[:] |
| 180 | |
| 181 | def __repr__(self): |
| 182 | return "<FnmatchMatcher %r>" % self.pats |
| 183 | |
| 184 | def info(self): |
| 185 | """A list of strings for displaying when dumping state.""" |
| 186 | return self.pats |
| 187 | |
| 188 | def match(self, fpath): |
| 189 | """Does `fpath` match one of our filename patterns?""" |
| 190 | for pat in self.pats: |
| 191 | if fnmatch.fnmatch(fpath, pat): |
| 192 | return True |
| 193 | return False |
| 194 | |
| 195 | |
| 196 | def sep(s): |
| 197 | """Find the path separator used in this string, or os.sep if none.""" |
| 198 | sep_match = re.search(r"[\\/]", s) |
| 199 | if sep_match: |
| 200 | the_sep = sep_match.group(0) |
| 201 | else: |
| 202 | the_sep = os.sep |
| 203 | return the_sep |
| 204 | |
| 205 | |
| 206 | class PathAliases(object): |
| 207 | """A collection of aliases for paths. |
| 208 | |
| 209 | When combining data files from remote machines, often the paths to source |
| 210 | code are different, for example, due to OS differences, or because of |
| 211 | serialized checkouts on continuous integration machines. |
| 212 | |
| 213 | A `PathAliases` object tracks a list of pattern/result pairs, and can |
| 214 | map a path through those aliases to produce a unified path. |
| 215 | |
| 216 | `locator` is a FileLocator that is used to canonicalize the results. |
| 217 | |
| 218 | """ |
| 219 | def __init__(self, locator=None): |
| 220 | self.aliases = [] |
| 221 | self.locator = locator |
| 222 | |
| 223 | def add(self, pattern, result): |
| 224 | """Add the `pattern`/`result` pair to the list of aliases. |
| 225 | |
| 226 | `pattern` is an `fnmatch`-style pattern. `result` is a simple |
| 227 | string. When mapping paths, if a path starts with a match against |
| 228 | `pattern`, then that match is replaced with `result`. This models |
| 229 | isomorphic source trees being rooted at different places on two |
| 230 | different machines. |
| 231 | |
| 232 | `pattern` can't end with a wildcard component, since that would |
| 233 | match an entire tree, and not just its root. |
| 234 | |
| 235 | """ |
| 236 | # The pattern can't end with a wildcard component. |
| 237 | pattern = pattern.rstrip(r"\/") |
| 238 | if pattern.endswith("*"): |
| 239 | raise CoverageException("Pattern must not end with wildcards.") |
| 240 | pattern_sep = sep(pattern) |
| 241 | |
| 242 | # The pattern is meant to match a filepath. Let's make it absolute |
| 243 | # unless it already is, or is meant to match any prefix. |
| 244 | if not pattern.startswith('*') and not isabs_anywhere(pattern): |
| 245 | pattern = abs_file(pattern) |
| 246 | pattern += pattern_sep |
| 247 | |
| 248 | # Make a regex from the pattern. fnmatch always adds a \Z or $ to |
| 249 | # match the whole string, which we don't want. |
| 250 | regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') |
| 251 | if regex_pat.endswith("$"): |
| 252 | regex_pat = regex_pat[:-1] |
| 253 | # We want */a/b.py to match on Windows too, so change slash to match |
| 254 | # either separator. |
| 255 | regex_pat = regex_pat.replace(r"\/", r"[\\/]") |
| 256 | # We want case-insensitive matching, so add that flag. |
| 257 | regex = re.compile(r"(?i)" + regex_pat) |
| 258 | |
| 259 | # Normalize the result: it must end with a path separator. |
| 260 | result_sep = sep(result) |
| 261 | result = result.rstrip(r"\/") + result_sep |
| 262 | self.aliases.append((regex, result, pattern_sep, result_sep)) |
| 263 | |
| 264 | def map(self, path): |
| 265 | """Map `path` through the aliases. |
| 266 | |
| 267 | `path` is checked against all of the patterns. The first pattern to |
| 268 | match is used to replace the root of the path with the result root. |
| 269 | Only one pattern is ever used. If no patterns match, `path` is |
| 270 | returned unchanged. |
| 271 | |
| 272 | The separator style in the result is made to match that of the result |
| 273 | in the alias. |
| 274 | |
| 275 | """ |
| 276 | for regex, result, pattern_sep, result_sep in self.aliases: |
| 277 | m = regex.match(path) |
| 278 | if m: |
| 279 | new = path.replace(m.group(0), result) |
| 280 | if pattern_sep != result_sep: |
| 281 | new = new.replace(pattern_sep, result_sep) |
| 282 | if self.locator: |
| 283 | new = self.locator.canonical_filename(new) |
| 284 | return new |
| 285 | return path |
| 286 | |
| 287 | |
| 288 | def find_python_files(dirname): |
| 289 | """Yield all of the importable Python files in `dirname`, recursively. |
| 290 | |
| 291 | To be importable, the files have to be in a directory with a __init__.py, |
| 292 | except for `dirname` itself, which isn't required to have one. The |
| 293 | assumption is that `dirname` was specified directly, so the user knows |
| 294 | best, but subdirectories are checked for a __init__.py to be sure we only |
| 295 | find the importable files. |
| 296 | |
| 297 | """ |
| 298 | for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
| 299 | if i > 0 and '__init__.py' not in filenames: |
| 300 | # If a directory doesn't have __init__.py, then it isn't |
| 301 | # importable and neither are its files |
| 302 | del dirnames[:] |
| 303 | continue |
| 304 | for filename in filenames: |
| 305 | # We're only interested in files that look like reasonable Python |
| 306 | # files: Must end with .py or .pyw, and must not have certain funny |
| 307 | # characters that probably mean they are editor junk. |
| 308 | if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): |
| 309 | yield os.path.join(dirpath, filename) |