Loading hooks/copyright.py +68 −11 Original line number Diff line number Diff line Loading @@ -20,32 +20,89 @@ Check if the given files contain a copyright notice mentioning the current year import re import sys import time from subprocess import check_output from pathlib import Path from subprocess import PIPE from subprocess import run from typing import Dict from typing import Iterable from typing import Iterator from typing import List def check_file(path: str) -> bool: def check_file(path: str, year: str) -> bool: """ Check for an up-to-date copyright notice in the first few lines of the given file """ with open(path) as file: lines = file.read(512) year = time.strftime('%Y') return bool(re.search(f'\\b(?:copyright)\\b.*\\b{year}\\b', lines, re.I)) def filter_excluded(paths: Iterable[Path]) -> List[Path]: """ Return a list of files from the input that are not excluded by git """ cmd = ['git', 'ls-files', '--other', '--cached', '--exclude-standard', '-z', '--'] cmd.extend(path.as_posix() for path in paths) proc = run(cmd, stdout=PIPE, check=True) return list(split_paths(proc.stdout)) def get_file_years(paths: List[Path]) -> Dict[Path, str]: """ Return a mapping of paths to the year they where last changed (if they are tracked) """ output = dict() cmd = [ 'git', 'log', '--topo-order', '--format=format:%ad', '--date=format:%Y', '--name-only', '-z', '--', ] cmd.extend(p.as_posix() for p in paths) proc = run(cmd, stdout=PIPE, check=True) regex = re.compile(br'(?P<year>[0-9]{4,})(?:\n|\r|\r\n)(?P<files>.*?)\x00\x00') for match in regex.finditer(proc.stdout): year = match.group('year').decode() for path in split_paths(match.group('files')): if path not in output: output[path] = year return output def get_changed(paths: List[Path]) -> Iterator[Path]: """ Return an iterator of changed paths """ cmd = ['git', 'diff', '--no-commit-id', '--name-only', '-z', 'HEAD', '--'] cmd.extend(path.as_posix() for path in paths) proc = run(cmd, stdout=PIPE, check=True) return split_paths(proc.stdout) def split_paths(paths: bytes) -> Iterator[Path]: """ Return an iterator of Paths from a null-separated byte string list """ return (Path(p.decode()) for p in paths.split(b'\x00') if p != b'') def main(): """ CLI entrypoint """ cmd = ['git', 'diff', '--no-commit-id', '--name-only', '-z', 'HEAD'] changed = set(check_output(cmd, text=True).split('\0')) changed.remove('') missing = [] paths = filter_excluded(Path(arg) for arg in sys.argv[1:]) for path in sys.argv[1:]: if path not in changed: continue if not check_file(path): year = time.strftime('%Y') years = {path: year for path in paths if path.is_file()} years.update(get_file_years(paths)) years.update((path, year) for path in get_changed(paths)) missing = [] for path in paths: if path.is_file() and not check_file(path, years[path]): missing.append(path) if missing: Loading Loading
hooks/copyright.py +68 −11 Original line number Diff line number Diff line Loading @@ -20,32 +20,89 @@ Check if the given files contain a copyright notice mentioning the current year import re import sys import time from subprocess import check_output from pathlib import Path from subprocess import PIPE from subprocess import run from typing import Dict from typing import Iterable from typing import Iterator from typing import List def check_file(path: str) -> bool: def check_file(path: str, year: str) -> bool: """ Check for an up-to-date copyright notice in the first few lines of the given file """ with open(path) as file: lines = file.read(512) year = time.strftime('%Y') return bool(re.search(f'\\b(?:copyright)\\b.*\\b{year}\\b', lines, re.I)) def filter_excluded(paths: Iterable[Path]) -> List[Path]: """ Return a list of files from the input that are not excluded by git """ cmd = ['git', 'ls-files', '--other', '--cached', '--exclude-standard', '-z', '--'] cmd.extend(path.as_posix() for path in paths) proc = run(cmd, stdout=PIPE, check=True) return list(split_paths(proc.stdout)) def get_file_years(paths: List[Path]) -> Dict[Path, str]: """ Return a mapping of paths to the year they where last changed (if they are tracked) """ output = dict() cmd = [ 'git', 'log', '--topo-order', '--format=format:%ad', '--date=format:%Y', '--name-only', '-z', '--', ] cmd.extend(p.as_posix() for p in paths) proc = run(cmd, stdout=PIPE, check=True) regex = re.compile(br'(?P<year>[0-9]{4,})(?:\n|\r|\r\n)(?P<files>.*?)\x00\x00') for match in regex.finditer(proc.stdout): year = match.group('year').decode() for path in split_paths(match.group('files')): if path not in output: output[path] = year return output def get_changed(paths: List[Path]) -> Iterator[Path]: """ Return an iterator of changed paths """ cmd = ['git', 'diff', '--no-commit-id', '--name-only', '-z', 'HEAD', '--'] cmd.extend(path.as_posix() for path in paths) proc = run(cmd, stdout=PIPE, check=True) return split_paths(proc.stdout) def split_paths(paths: bytes) -> Iterator[Path]: """ Return an iterator of Paths from a null-separated byte string list """ return (Path(p.decode()) for p in paths.split(b'\x00') if p != b'') def main(): """ CLI entrypoint """ cmd = ['git', 'diff', '--no-commit-id', '--name-only', '-z', 'HEAD'] changed = set(check_output(cmd, text=True).split('\0')) changed.remove('') missing = [] paths = filter_excluded(Path(arg) for arg in sys.argv[1:]) for path in sys.argv[1:]: if path not in changed: continue if not check_file(path): year = time.strftime('%Y') years = {path: year for path in paths if path.is_file()} years.update(get_file_years(paths)) years.update((path, year) for path in get_changed(paths)) missing = [] for path in paths: if path.is_file() and not check_file(path, years[path]): missing.append(path) if missing: Loading