From 0c760253f34db9998371ec434ef6059c54f7ddbf Mon Sep 17 00:00:00 2001 From: Julian Berman Date: Wed, 26 Aug 2020 20:28:26 -0400 Subject: [PATCH 1/4] Add a way to do case-insensitive sorting via file-contents-sorter. --- pre_commit_hooks/file_contents_sorter.py | 21 +++++++-- tests/file_contents_sorter_test.py | 56 +++++++++++++++++------- 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/pre_commit_hooks/file_contents_sorter.py b/pre_commit_hooks/file_contents_sorter.py index 76dc4fa1..4c1c7479 100644 --- a/pre_commit_hooks/file_contents_sorter.py +++ b/pre_commit_hooks/file_contents_sorter.py @@ -10,6 +10,8 @@ conflicts and keep the file nicely ordered. """ import argparse +from typing import Any +from typing import Callable from typing import IO from typing import Optional from typing import Sequence @@ -18,9 +20,15 @@ FAIL = 1 -def sort_file_contents(f: IO[bytes]) -> int: +def sort_file_contents( + f: IO[bytes], + key: Optional[Callable[[bytes], Any]], +) -> int: before = list(f) - after = sorted(line.strip(b'\n\r') for line in before if line.strip()) + after = sorted( + (line.strip(b'\n\r') for line in before if line.strip()), + key=key, + ) before_string = b''.join(before) after_string = b'\n'.join(after) + b'\n' @@ -37,13 +45,20 @@ def sort_file_contents(f: IO[bytes]) -> int: def main(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser() parser.add_argument('filenames', nargs='+', help='Files to sort') + parser.add_argument( + '--ignore-case', + action='store_const', + const=bytes.lower, + default=None, + help='fold lower case to upper case characters', + ) args = parser.parse_args(argv) retv = PASS for arg in args.filenames: with open(arg, 'rb+') as file_obj: - ret_for_file = sort_file_contents(file_obj) + ret_for_file = sort_file_contents(file_obj, key=args.ignore_case) if ret_for_file: print(f'Sorting {arg}') diff --git a/tests/file_contents_sorter_test.py b/tests/file_contents_sorter_test.py index c8afc2d8..9ebb021a 100644 --- a/tests/file_contents_sorter_test.py +++ b/tests/file_contents_sorter_test.py @@ -6,28 +6,52 @@ @pytest.mark.parametrize( - ('input_s', 'expected_retval', 'output'), + ('input_s', 'argv', 'expected_retval', 'output'), ( - (b'', FAIL, b'\n'), - (b'lonesome\n', PASS, b'lonesome\n'), - (b'missing_newline', FAIL, b'missing_newline\n'), - (b'newline\nmissing', FAIL, b'missing\nnewline\n'), - (b'missing\nnewline', FAIL, b'missing\nnewline\n'), - (b'alpha\nbeta\n', PASS, b'alpha\nbeta\n'), - (b'beta\nalpha\n', FAIL, b'alpha\nbeta\n'), - (b'C\nc\n', PASS, b'C\nc\n'), - (b'c\nC\n', FAIL, b'C\nc\n'), - (b'mag ical \n tre vor\n', FAIL, b' tre vor\nmag ical \n'), - (b'@\n-\n_\n#\n', FAIL, b'#\n-\n@\n_\n'), - (b'extra\n\n\nwhitespace\n', FAIL, b'extra\nwhitespace\n'), - (b'whitespace\n\n\nextra\n', FAIL, b'extra\nwhitespace\n'), + (b'', [], FAIL, b'\n'), + (b'lonesome\n', [], PASS, b'lonesome\n'), + (b'missing_newline', [], FAIL, b'missing_newline\n'), + (b'newline\nmissing', [], FAIL, b'missing\nnewline\n'), + (b'missing\nnewline', [], FAIL, b'missing\nnewline\n'), + (b'alpha\nbeta\n', [], PASS, b'alpha\nbeta\n'), + (b'beta\nalpha\n', [], FAIL, b'alpha\nbeta\n'), + (b'C\nc\n', [], PASS, b'C\nc\n'), + (b'c\nC\n', [], FAIL, b'C\nc\n'), + (b'mag ical \n tre vor\n', [], FAIL, b' tre vor\nmag ical \n'), + (b'@\n-\n_\n#\n', [], FAIL, b'#\n-\n@\n_\n'), + (b'extra\n\n\nwhitespace\n', [], FAIL, b'extra\nwhitespace\n'), + (b'whitespace\n\n\nextra\n', [], FAIL, b'extra\nwhitespace\n'), + ( + b'fee\nFie\nFoe\nfum\n', + [], + FAIL, + b'Fie\nFoe\nfee\nfum\n', + ), + ( + b'Fie\nFoe\nfee\nfum\n', + [], + PASS, + b'Fie\nFoe\nfee\nfum\n', + ), + ( + b'fee\nFie\nFoe\nfum\n', + ['--ignore-case'], + PASS, + b'fee\nFie\nFoe\nfum\n', + ), + ( + b'Fie\nFoe\nfee\nfum\n', + ['--ignore-case'], + FAIL, + b'fee\nFie\nFoe\nfum\n', + ), ), ) -def test_integration(input_s, expected_retval, output, tmpdir): +def test_integration(input_s, argv, expected_retval, output, tmpdir): path = tmpdir.join('file.txt') path.write_binary(input_s) - output_retval = main([str(path)]) + output_retval = main([str(path)] + argv) assert path.read_binary() == output assert output_retval == expected_retval From 012bb0691f4e1615c11be5860d9e427523d42985 Mon Sep 17 00:00:00 2001 From: Marcus Shawcroft Date: Wed, 16 Sep 2020 06:26:11 +0100 Subject: [PATCH 2/4] Fix #518, provide --enforce-all option to check_added_large_files The --enforce-all option when provided ensures that all files passed on the command line are checked against the size limit. Default behaviour remains unchanged. --- README.md | 3 +++ pre_commit_hooks/check_added_large_files.py | 25 +++++++++++++++++---- tests/check_added_large_files_test.py | 23 +++++++++++++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3552721f..a6b62abd 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,11 @@ Add this to your `.pre-commit-config.yaml` #### `check-added-large-files` Prevent giant files from being committed. - Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB). + - Limits checked files to those indicated as staged for addition by git. - If `git-lfs` is installed, lfs files will be skipped (requires `git-lfs>=2.2.1`) + - `--enforce-all` - Check all listed files not just those staged for + addition. #### `check-ast` Simply check whether files parse as valid python. diff --git a/pre_commit_hooks/check_added_large_files.py b/pre_commit_hooks/check_added_large_files.py index 91f57544..cb646d7b 100644 --- a/pre_commit_hooks/check_added_large_files.py +++ b/pre_commit_hooks/check_added_large_files.py @@ -21,11 +21,20 @@ def lfs_files() -> Set[str]: return set(json.loads(lfs_ret)['files']) -def find_large_added_files(filenames: Sequence[str], maxkb: int) -> int: +def find_large_added_files( + filenames: Sequence[str], + maxkb: int, + *, + enforce_all: bool = False, +) -> int: # Find all added files that are also in the list of files pre-commit tells # us about retv = 0 - for filename in (added_files() & set(filenames)) - lfs_files(): + filenames_filtered = set(filenames) - lfs_files() + if not enforce_all: + filenames_filtered &= added_files() + + for filename in filenames_filtered: kb = int(math.ceil(os.stat(filename).st_size / 1024)) if kb > maxkb: print(f'{filename} ({kb} KB) exceeds {maxkb} KB.') @@ -40,13 +49,21 @@ def main(argv: Optional[Sequence[str]] = None) -> int: 'filenames', nargs='*', help='Filenames pre-commit believes are changed.', ) + parser.add_argument( + '--enforce-all', action='store_true', + help='Enforce all files are checked, not just staged files.', + ) parser.add_argument( '--maxkb', type=int, default=500, help='Maxmimum allowable KB for added files', ) - args = parser.parse_args(argv) - return find_large_added_files(args.filenames, args.maxkb) + + return find_large_added_files( + args.filenames, + args.maxkb, + enforce_all=args.enforce_all, + ) if __name__ == '__main__': diff --git a/tests/check_added_large_files_test.py b/tests/check_added_large_files_test.py index 40ffd24d..ff53b05b 100644 --- a/tests/check_added_large_files_test.py +++ b/tests/check_added_large_files_test.py @@ -40,6 +40,17 @@ def test_add_something_giant(temp_git_dir): assert find_large_added_files(['f.py'], 10) == 0 +def test_enforce_all(temp_git_dir): + with temp_git_dir.as_cwd(): + temp_git_dir.join('f.py').write('a' * 10000) + + # Should fail, when not staged with enforce_all + assert find_large_added_files(['f.py'], 0, enforce_all=True) == 1 + + # Should pass, when not staged without enforce_all + assert find_large_added_files(['f.py'], 0, enforce_all=False) == 0 + + def test_added_file_not_in_pre_commits_list(temp_git_dir): with temp_git_dir.as_cwd(): temp_git_dir.join('f.py').write("print('hello world')") @@ -97,3 +108,15 @@ def test_moves_with_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover # Now move it and make sure the hook still succeeds cmd_output('git', 'mv', 'a.bin', 'b.bin') assert main(('--maxkb', '9', 'b.bin')) == 0 + + +@xfailif_no_gitlfs +def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover + with temp_git_dir.as_cwd(): + monkeypatch.setenv('HOME', str(temp_git_dir)) + cmd_output('git', 'lfs', 'install') + temp_git_dir.join('f.py').write('a' * 10000) + cmd_output('git', 'lfs', 'track', 'f.py') + cmd_output('git', 'add', '--', '.') + # With --enforce-all large files on git lfs should succeed + assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0 From d18bd5b75f9b7d49a3c8c105cd95362a24ec6e31 Mon Sep 17 00:00:00 2001 From: Jeremiah Gowdy Date: Sun, 4 Oct 2020 18:45:54 -0700 Subject: [PATCH 3/4] Add new byte-order-marker checker/fixer --- .pre-commit-hooks.yaml | 10 ++++++-- README.md | 7 +++--- pre_commit_hooks/fix_byte_order_marker.py | 30 +++++++++++++++++++++++ setup.cfg | 1 + tests/fix_byte_order_marker_test.py | 13 ++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 pre_commit_hooks/fix_byte_order_marker.py create mode 100644 tests/fix_byte_order_marker_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3e4dc9ea..a47f7339 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -17,8 +17,8 @@ language: python types: [python] - id: check-byte-order-marker - name: Check for byte-order marker - description: Forbid files which have a UTF-8 byte-order marker + name: 'check BOM - deprecated: use fix-byte-order-marker' + description: forbid files which have a UTF-8 byte-order marker entry: check-byte-order-marker language: python types: [text] @@ -131,6 +131,12 @@ entry: file-contents-sorter language: python files: '^$' +- id: fix-byte-order-marker + name: fix UTF-8 byte order marker + description: removes UTF-8 byte order marker + entry: fix-byte-order-marker + language: python + types: [text] - id: fix-encoding-pragma name: Fix python encoding pragma language: python diff --git a/README.md b/README.md index a6b62abd..18340bf7 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,6 @@ Require literal syntax when initializing empty or zero Python builtin types. - Ignore this requirement for specific builtin types with `--ignore=type1,type2,…`. - Forbid `dict` keyword syntax with `--no-allow-dict-kwargs`. -#### `check-byte-order-marker` -Forbid files which have a UTF-8 byte-order marker - #### `check-case-conflict` Check for files with names that would conflict on a case-insensitive filesystem like MacOS HFS+ or Windows FAT. @@ -102,6 +99,9 @@ This hook replaces double quoted strings with single quoted strings. #### `end-of-file-fixer` Makes sure files end in a newline and only a newline. +#### `fix-byte-order-marker` +removes UTF-8 byte order marker + #### `fix-encoding-pragma` Add `# -*- coding: utf-8 -*-` to the top of python files. - To remove the coding pragma pass `--remove` (useful in a python3-only codebase) @@ -183,6 +183,7 @@ Trims trailing whitespace. [mirrors-autopep8](https://github.com/pre-commit/mirrors-autopep8) - `pyflakes`: instead use `flake8` - `flake8`: instead use [upstream flake8](https://gitlab.com/pycqa/flake8) +- `check-byte-order-marker`: instead use fix-byte-order-marker ### As a standalone package diff --git a/pre_commit_hooks/fix_byte_order_marker.py b/pre_commit_hooks/fix_byte_order_marker.py new file mode 100644 index 00000000..1ffe047d --- /dev/null +++ b/pre_commit_hooks/fix_byte_order_marker.py @@ -0,0 +1,30 @@ +import argparse +from typing import Optional +from typing import Sequence + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to check') + args = parser.parse_args(argv) + + retv = 0 + + for filename in args.filenames: + with open(filename, 'rb') as f_b: + bts = f_b.read(3) + + if bts == b'\xef\xbb\xbf': + with open(filename, newline='', encoding='utf-8-sig') as f: + contents = f.read() + with open(filename, 'w', newline='', encoding='utf-8') as f: + f.write(contents) + + print(f'{filename}: removed byte-order marker') + retv = 1 + + return retv + + +if __name__ == '__main__': + exit(main()) diff --git a/setup.cfg b/setup.cfg index 47b8bb6d..c8677f56 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,6 +48,7 @@ console_scripts = double-quote-string-fixer = pre_commit_hooks.string_fixer:main end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:main file-contents-sorter = pre_commit_hooks.file_contents_sorter:main + fix-byte-order-marker = pre_commit_hooks.fix_byte_order_marker:main fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main mixed-line-ending = pre_commit_hooks.mixed_line_ending:main diff --git a/tests/fix_byte_order_marker_test.py b/tests/fix_byte_order_marker_test.py new file mode 100644 index 00000000..da150e37 --- /dev/null +++ b/tests/fix_byte_order_marker_test.py @@ -0,0 +1,13 @@ +from pre_commit_hooks import fix_byte_order_marker + + +def test_failure(tmpdir): + f = tmpdir.join('f.txt') + f.write_text('ohai', encoding='utf-8-sig') + assert fix_byte_order_marker.main((str(f),)) == 1 + + +def test_success(tmpdir): + f = tmpdir.join('f.txt') + f.write_text('ohai', encoding='utf-8') + assert fix_byte_order_marker.main((str(f),)) == 0 From 9136088a246768144165fcc3ecc3d31bb686920a Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 20 Oct 2020 10:17:01 -0700 Subject: [PATCH 4/4] v3.3.0 --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 17 +++++++++++++++++ README.md | 2 +- setup.cfg | 2 +- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b4614a74..d7885c61 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 + rev: v3.3.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aa7441b..08966ece 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,20 @@ +3.3.0 - 2020-10-20 +================== + +### Features +- `file-contents-sorter`: add `--ignore-case` option for case-insensitive + sorting + - #514 PR by @Julian. +- `check-added-large-files`: add `--enforce-all` option to check non-added + files as well + - #519 PR by @mshawcroft. + - #518 issue by @mshawcroft. +- `fix-byte-order-marker`: new hook which fixes UTF-8 byte-order marker. + - #522 PR by @jgowdy. + +### Deprecations +- `check-byte-order-marker` is now deprecated for `fix-byte-order-marker` + 3.2.0 - 2020-07-30 ================== diff --git a/README.md b/README.md index 18340bf7..3fe52f02 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Add this to your `.pre-commit-config.yaml` ```yaml - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 # Use the ref you want to point at + rev: v3.3.0 # Use the ref you want to point at hooks: - id: trailing-whitespace # - id: ... diff --git a/setup.cfg b/setup.cfg index c8677f56..ab80bd60 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = pre_commit_hooks -version = 3.2.0 +version = 3.3.0 description = Some out-of-the-box hooks for pre-commit. long_description = file: README.md long_description_content_type = text/markdown