Commit 4541e287 authored by Dom Sekotill's avatar Dom Sekotill
Browse files

Add a hook checking for correct executable bits

parent dbf2deed
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -65,6 +65,9 @@ ignore =
  ;[ No blank lines allowed between a section header and its content ]
  D412

  ;[ missing whitespace around bitwise or shift operator ]
  E227

  ;[ Line too long ]
  ; Prefer B950 implementation
  E501
+9 −0
Original line number Diff line number Diff line
@@ -63,6 +63,15 @@ repos:
    entry: hooks/first_parent.py
    pass_filenames: false
    stages: [merge-commit, commit, push]
  - id: check-executable-modes
    name: Check executable bits
    language: python
    entry: hooks/executables.py
    args: [--fix-mode]
    minimum_pre_commit_version: 2.9.0
    require_serial: true
    stages: [commit]
    types: [text]

- repo: https://github.com/pre-commit/pygrep-hooks
  rev: v1.8.0
+13 −0
Original line number Diff line number Diff line
@@ -74,3 +74,16 @@
  always_run: true
  pass_filenames: false
  stages: [merge-commit, commit, push]

- id: check-executable-modes
  name: Check executable bits
  description: |
    Check for missing or unnecessary executable bits on text files, based on the
    presence of shebang lines.
  language: python
  entry: check-executable-bits
  args: [--fix-mode]
  minimum_pre_commit_version: 2.9.0
  require_serial: true
  stages: [commit]
  types: [text]

hooks/executables.py

0 → 100755
+143 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
#  Copyright 2021  Dominik Sekotill <dom.sekotill@kodo.org.uk>
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""
Check that shebang lines and executable bits match
"""

import argparse
import sys
from os import X_OK
from pathlib import Path
from subprocess import PIPE
from subprocess import Popen
from typing import Dict
from typing import Generator
from typing import Iterable
from typing import Tuple

# Byte-order-marks from chardet package
BOM_MARKS = [
	b'\xEF\xBB\xBF',  # UTF-8 BOM?! Why would this ever be necessary?
	b'\xFF\xFE\x00\x00',  # UTF-32-LE
	b'\x00\x00\xFE\xFF',  # UTF-32-BE
	b'\xFE\xFF\x00\x00',  # UCS-4
	b'\x00\x00\xFF\xFE',  # UCS-4
	b'\xFF\xFE',  # UTF-16-LE
	b'\xFE\xFF',  # UTF-16-BE
]

SHEBANG = b'#!'


def has_shebang(path: Path) -> bool:
	"""
	Return whether the given file starts with a "sharp-bang" (#!)
	"""
	with path.open('rb') as f:
		head = memoryview(f.read(512))

	# ignore BOM if there appears to be one (are BOMs even valid in shebang scripts?)
	for bom in BOM_MARKS:
		if head[:len(bom)] == bom:
			head = head[len(bom):]
			break

	return head[:2] == SHEBANG


def get_git_modes() -> Dict[Path, int]:
	"""
	Return a mapping of each staged file with it's mode
	"""
	out = dict()
	with Popen(['git', 'ls-files', '--stage'], stdout=PIPE, encoding='utf-8') as proc:
		assert proc.stdout is not None
		for line in proc.stdout:
			s_mode, _1, _2, s_path = line.split(maxsplit=3)
			out[Path(s_path.strip())] = int(s_mode, 8)
	return out


def is_executable(mode: int) -> bool:
	"""
	Return whether at least one executable bit is set in a mode value
	"""
	return bool(mode & X_OK or mode<<3 & X_OK or mode<<6 & X_OK)


def check_paths(paths: Iterable[Path]) -> Generator[Tuple[Path, int], None, None]:
	"""
	Yield (path, new_mode) for each item of paths which requires a mode change
	"""
	git_modes = get_git_modes()
	for path in paths:
		if not path.exists():
			raise FileNotFoundError(path)
		if path.is_dir():
			raise IsADirectoryError(path)
		try:
			mode = git_modes[path]
		except KeyError:
			mode = path.stat().st_mode
		executable = is_executable(mode)
		shebang = has_shebang(path)
		if executable == shebang:
			continue
		if shebang:
			mode |= 0o111
		else:
			mode ^= 0o111
		yield (path, mode)


def cli_parser() -> argparse.ArgumentParser:
	"""
	Return an argparse parser
	"""
	parser = argparse.ArgumentParser()

	parser.add_argument(
		'--fix-mode',
		action='store_true',
		help="Set or unset executable bits depending on found shebang lines",
	)

	parser.add_argument('files', nargs='+', type=Path)

	return parser


def main() -> int:
	"""
	CLI entrypoint
	"""
	opts = cli_parser().parse_args()
	return_code = 0
	output = sys.stderr.write

	for path, new_mode in check_paths(opts.files):
		if opts.fix_mode:
			path.chmod(new_mode)
			output(f"new mode {new_mode:06o}: {path}\n")
		else:
			output(f"needs mode {new_mode:06o}: {path}\n")
			return_code = 1

	return return_code


if __name__ == '__main__':
	sys.exit(main())
+1 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ requires-python = "~=3.6"
check-copyright-notice = "hooks.copyright:main"
check-for-squash = "hooks.squash:main"
check-first-parent = "hooks.first_parent:main"
check-executable-bits = "hooks.executables:main"

[tool.isort]
force_single_line = true