Commit 0c53f4fd authored by Dom Sekotill's avatar Dom Sekotill
Browse files

Optimise walking by matching directories only once

Directories were matched twice before: once when checking the parent
directory's entries and once when loading rules. Now both are done in
one step.
parent 684ca363
Loading
Loading
Loading
Loading
+13 −3
Original line number Diff line number Diff line
@@ -2,6 +2,8 @@ from os import path
from pkg_resources import resource_listdir
from importlib import import_module

from .. import rules, walk


class Loader(object):

@@ -27,18 +29,26 @@ class Loader(object):
		raise NotImplementedError()

	def load_from_dir(self, dirpath):
		if rules.run_rules(self.rules, dirpath) is False:
			# If a directory is 'ignored' by an ignore rule, don't read rules
			# from it.
			raise walk.SkipDirectory()
		self.load_directory(dirpath)

	def load_directory(self, dirpath):
		"""
		Find any ignore rules in `dirpath` and add them to a rule graph.

		`dirpath` is relative to `self.project_root` and should be passed as-is
		to Rule constructors. (*)
		to Rule constructors.[^1]  `dirpath` is always allowed by any current
		rules and should not require checking in most implementations.

		Subdirectories do not need to be recursively searched as this will be 
		called for each subdirectory checked. It is ultimately up to the 
		implementor how much of the rule graph should be generated on each 
		call.

		(*) Rules generated for subdirectories of `dirpath` should have the 
		[^1]: Rules generated for subdirectories of `dirpath` should have the
		full path of the subdirectory relative to `self.project_root`
		"""
		raise NotImplementedError()
+3 −7
Original line number Diff line number Diff line
@@ -70,15 +70,11 @@ class GitLoader(loaders.Loader):
		return False


	def load_from_dir(self, dirpath):
		if rules.run_rules(self.rules, dirpath) is False:
			# If a directory is 'ignored' by an ignore rule, don't read rules 
			# from it.
			return

	def load_directory(self, dirpath):
		abs_dirpath = path.join(self.project_root, dirpath)

		if dirpath not in ['.', ''] and path.exists(path.join(abs_dirpath, '.git')):
		# Skip submodules / embedded repositories
		if dirpath not in rules.TOP_NAMES and path.exists(path.join(abs_dirpath, '.git')):
			raise walk.SkipDirectory()

		with util.ignore_if_missing():
+6 −0
Original line number Diff line number Diff line
@@ -2,6 +2,9 @@ from os import path
from warnings import warn


TOP_NAMES = {'', '.'}


def add_attr(attrs, obj, name):
	try:
		attrs.append("{0}={1}".format(name, repr(getattr(obj, name))))
@@ -39,6 +42,9 @@ class Rule(object):
		return ('<{0}>'.format(' '.join(attrs)))

	def check_path(self, testpath, testname=None):
		if testpath in TOP_NAMES:
			return IncludeRule()

		if not testpath.startswith(self.pattern_root):
			return self.next_if_fail

+4 −8
Original line number Diff line number Diff line
@@ -22,16 +22,12 @@ def walk_files(start):

	for walkpath, dirnames, filenames in walk(start):
		relpath = path.relpath(walkpath, top)
		relpath = '' if relpath == '.' else relpath
		if loader and relpath:
			try:
				loader.load_from_dir(relpath)
			except SkipDirectory:
				del dirnames[:]
				continue
		for dirname in list(dirnames):
			dirpath = path.join(relpath, dirname)
			if loader and not rules.run_rules(loader.rules, dirpath, dirname):
			try:
				if loader:
					loader.load_from_dir(dirpath)
			except SkipDirectory:
				dirnames.remove(dirname)
		for filename in filenames:
			filepath = path.join(relpath, filename)