Commit 0a5c7cd3 authored by Dom Sekotill's avatar Dom Sekotill
Browse files

Allow copyright checker to check ANY non-excluded file

parent 05f69a45
Loading
Loading
Loading
Loading
+68 −11
Original line number Diff line number Diff line
@@ -20,32 +20,89 @@ Check if the given files contain a copyright notice mentioning the current year
import re
import sys
import time
from subprocess import check_output
from pathlib import Path
from subprocess import PIPE
from subprocess import run
from typing import Dict
from typing import Iterable
from typing import Iterator
from typing import List


def check_file(path: str) -> bool:
def check_file(path: str, year: str) -> bool:
	"""
	Check for an up-to-date copyright notice in the first few lines of the given file
	"""
	with open(path) as file:
		lines = file.read(512)
	year = time.strftime('%Y')
	return bool(re.search(f'\\b(?:copyright)\\b.*\\b{year}\\b', lines, re.I))


def filter_excluded(paths: Iterable[Path]) -> List[Path]:
	"""
	Return a list of files from the input that are not excluded by git
	"""
	cmd = ['git', 'ls-files', '--other', '--cached', '--exclude-standard', '-z', '--']
	cmd.extend(path.as_posix() for path in paths)
	proc = run(cmd, stdout=PIPE, check=True)
	return list(split_paths(proc.stdout))


def get_file_years(paths: List[Path]) -> Dict[Path, str]:
	"""
	Return a mapping of paths to the year they where last changed (if they are tracked)
	"""
	output = dict()

	cmd = [
		'git', 'log', '--topo-order',
		'--format=format:%ad', '--date=format:%Y',
		'--name-only', '-z', '--',
	]
	cmd.extend(p.as_posix() for p in paths)
	proc = run(cmd, stdout=PIPE, check=True)

	regex = re.compile(br'(?P<year>[0-9]{4,})(?:\n|\r|\r\n)(?P<files>.*?)\x00\x00')
	for match in regex.finditer(proc.stdout):
		year = match.group('year').decode()
		for path in split_paths(match.group('files')):
			if path not in output:
				output[path] = year

	return output


def get_changed(paths: List[Path]) -> Iterator[Path]:
	"""
	Return an iterator of changed paths
	"""
	cmd = ['git', 'diff', '--no-commit-id', '--name-only', '-z', 'HEAD', '--']
	cmd.extend(path.as_posix() for path in paths)
	proc = run(cmd, stdout=PIPE, check=True)
	return split_paths(proc.stdout)


def split_paths(paths: bytes) -> Iterator[Path]:
	"""
	Return an iterator of Paths from a null-separated byte string list
	"""
	return (Path(p.decode()) for p in paths.split(b'\x00') if p != b'')


def main():
	"""
	CLI entrypoint
	"""
	cmd = ['git', 'diff', '--no-commit-id', '--name-only', '-z', 'HEAD']
	changed = set(check_output(cmd, text=True).split('\0'))
	changed.remove('')
	missing = []
	paths = filter_excluded(Path(arg) for arg in sys.argv[1:])

	for path in sys.argv[1:]:
		if path not in changed:
			continue
		if not check_file(path):
	year = time.strftime('%Y')
	years = {path: year for path in paths if path.is_file()}
	years.update(get_file_years(paths))
	years.update((path, year) for path in get_changed(paths))

	missing = []
	for path in paths:
		if path.is_file() and not check_file(path, years[path]):
			missing.append(path)

	if missing: