Commit 0386adef authored by Dom Sekotill's avatar Dom Sekotill
Browse files

Improve mangling of names with slashes

Several, for instance, can be automatically treated as alternatives.
parent a0a04929
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@ IDENTIFIERS = {
	"Handhelds/PDA's": ["Handhelds", "PDAs"],
	"Science/Research": ["Science", "Research"],
	"Other/Proprietary License": ["OtherLicense", "ProprietaryLicense"],
	"University of Illinois/NCSA Open Source License": ["UniversityOfIllinois_NCSA_OpenSourceLicense"],
	"zlib/libpng License": ['zlibLicense', 'libpngLicense'],
	"SunOS/Solaris": ['SunOS', 'Solaris'],
	"Database Engines/Servers": ['DatabaseEngines', 'DatabaseServers'],
@@ -64,6 +65,9 @@ IDENTIFIERS = {
	"CGI Tools/Libraries": ['CGITools', 'CGILibraries'],
	"3D Rendering": ["Rendering3D"],
	"3D Modeling": ["Modeling3D"],
	"PL/SQL": ["PL_SQL"],
	"BSD/OS": ["BSD_OS"],
	"Windows NT/2000": ["WindowsNT"],
}

# Get the contents of any parentheses and any text before or after
@@ -149,6 +153,16 @@ def replace_misidentifiers(match: re.Match) -> str:
	return post


def split_slashes(name: str) -> List[str]:
	"""
	Determine if any aliases can be extracted from slashes
	"""
	parts = [replace_misidentifiers(part) for part in name.split('/')]
	if all(part.isidentifier() for part in parts):
		return parts
	return [name]


def get_identifiers(label: str) -> List[str]:
	"""
	Return a label's normalised identifier and any normalised alias identifiers as a list
@@ -171,6 +185,9 @@ def get_identifiers(label: str) -> List[str]:
	inner = match.group('inner')
	aliases = get_identifiers(inner) if inner else []

	if not aliases:
		name, *aliases = split_slashes(name)

	# Strip out non-identifier characters
	name = replace_misidentifiers(name.strip())

+38 −17
Original line number Diff line number Diff line
@@ -363,7 +363,7 @@ class License(Classifier):
		Unlicense = TheUnlicense
		UniversalPermissiveLicense = "License :: OSI Approved :: Universal Permissive License (UPL)"
		UPL = UniversalPermissiveLicense
		UniversityOfIllinoisNCSA_OpenSourceLicense = "License :: OSI Approved :: University of Illinois/NCSA Open Source License"
		UniversityOfIllinois_NCSA_OpenSourceLicense = "License :: OSI Approved :: University of Illinois/NCSA Open Source License"
		VovidaSoftwareLicense_1_0 = "License :: OSI Approved :: Vovida Software License 1.0"
		W3C_License = "License :: OSI Approved :: W3C License"
		X_NetLicense = "License :: OSI Approved :: X.Net License"
@@ -481,7 +481,7 @@ class OperatingSystem(Classifier):
			Windows_8_1 = "Operating System :: Microsoft :: Windows :: Windows 8.1"
			Windows_95_98_2000 = "Operating System :: Microsoft :: Windows :: Windows 95/98/2000"
			WindowsCE = "Operating System :: Microsoft :: Windows :: Windows CE"
			WindowsNT_2000 = "Operating System :: Microsoft :: Windows :: Windows NT/2000"
			WindowsNT = "Operating System :: Microsoft :: Windows :: Windows NT/2000"
			WindowsServer_2003 = "Operating System :: Microsoft :: Windows :: Windows Server 2003"
			WindowsServer_2008 = "Operating System :: Microsoft :: Windows :: Windows Server 2008"
			WindowsVista = "Operating System :: Microsoft :: Windows :: Windows Vista"
@@ -569,7 +569,8 @@ class ProgrammingLanguage(Classifier):
	C_plus_plus = "Programming Language :: C++"
	ColdFusion = "Programming Language :: Cold Fusion"
	Cython = "Programming Language :: Cython"
	DelphiKylix = "Programming Language :: Delphi/Kylix"
	Delphi = "Programming Language :: Delphi/Kylix"
	Kylix = Delphi
	Dylan = "Programming Language :: Dylan"
	Eiffel = "Programming Language :: Eiffel"
	EmacsLisp = "Programming Language :: Emacs-Lisp"
@@ -750,7 +751,7 @@ class Topic(Classifier):
		CAI = ComputerAidedInstruction
		Testing = "Topic :: Education :: Testing"

	class GamesEntertainment(Classifier, classifier="Topic :: Games/Entertainment"):
	class Games(Classifier, classifier="Topic :: Games/Entertainment"):

		Arcade = "Topic :: Games/Entertainment :: Arcade"
		BoardGames = "Topic :: Games/Entertainment :: Board Games"
@@ -766,9 +767,11 @@ class Topic(Classifier):
		Simulation = "Topic :: Games/Entertainment :: Simulation"
		TurnBasedStrategy = "Topic :: Games/Entertainment :: Turn Based Strategy"

	Entertainment = Games

	class Internet(Classifier, classifier="Topic :: Internet"):

		class WWW_HTTP(Classifier, classifier="Topic :: Internet :: WWW/HTTP"):
		class WWW(Classifier, classifier="Topic :: Internet :: WWW/HTTP"):

			class DynamicContent(Classifier, classifier="Topic :: Internet :: WWW/HTTP :: Dynamic Content"):

@@ -776,7 +779,8 @@ class Topic(Classifier):
				CGILibraries = CGITools
				ContentManagementSystem = "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Content Management System"
				MessageBoards = "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Message Boards"
				NewsDiary = "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary"
				News = "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary"
				Diary = News
				PageCounters = "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Page Counters"
				Wiki = "Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Wiki"

@@ -792,9 +796,12 @@ class Topic(Classifier):

			Browsers = "Topic :: Internet :: WWW/HTTP :: Browsers"
			HTTP_Servers = "Topic :: Internet :: WWW/HTTP :: HTTP Servers"
			IndexingSearch = "Topic :: Internet :: WWW/HTTP :: Indexing/Search"
			Indexing = "Topic :: Internet :: WWW/HTTP :: Indexing/Search"
			Search = Indexing
			Session = "Topic :: Internet :: WWW/HTTP :: Session"

		HTTP = WWW

		FileTransferProtocol = "Topic :: Internet :: File Transfer Protocol (FTP)"
		FTP = FileTransferProtocol
		Finger = "Topic :: Internet :: Finger"
@@ -827,7 +834,7 @@ class Topic(Classifier):
			Presentation = "Topic :: Multimedia :: Graphics :: Presentation"
			Viewers = "Topic :: Multimedia :: Graphics :: Viewers"

		class SoundAudio(Classifier, classifier="Topic :: Multimedia :: Sound/Audio"):
		class Sound(Classifier, classifier="Topic :: Multimedia :: Sound/Audio"):

			class CD_Audio(Classifier, classifier="Topic :: Multimedia :: Sound/Audio :: CD Audio"):

@@ -840,7 +847,8 @@ class Topic(Classifier):
				MP3 = "Topic :: Multimedia :: Sound/Audio :: Players :: MP3"

			Analysis = "Topic :: Multimedia :: Sound/Audio :: Analysis"
			CaptureRecording = "Topic :: Multimedia :: Sound/Audio :: Capture/Recording"
			Capture = "Topic :: Multimedia :: Sound/Audio :: Capture/Recording"
			Recording = Capture
			Conversion = "Topic :: Multimedia :: Sound/Audio :: Conversion"
			Editors = "Topic :: Multimedia :: Sound/Audio :: Editors"
			MIDI = "Topic :: Multimedia :: Sound/Audio :: MIDI"
@@ -848,6 +856,8 @@ class Topic(Classifier):
			SoundSynthesis = "Topic :: Multimedia :: Sound/Audio :: Sound Synthesis"
			Speech = "Topic :: Multimedia :: Sound/Audio :: Speech"

		Audio = Sound

		class Video(Classifier, classifier="Topic :: Multimedia :: Video"):

			Capture = "Topic :: Multimedia :: Video :: Capture"
@@ -855,7 +865,7 @@ class Topic(Classifier):
			Display = "Topic :: Multimedia :: Video :: Display"
			NonLinearEditor = "Topic :: Multimedia :: Video :: Non-Linear Editor"

	class OfficeBusiness(Classifier, classifier="Topic :: Office/Business"):
	class Office(Classifier, classifier="Topic :: Office/Business"):

		class Financial(Classifier, classifier="Topic :: Office/Business :: Financial"):

@@ -865,11 +875,14 @@ class Topic(Classifier):
			Spreadsheet = "Topic :: Office/Business :: Financial :: Spreadsheet"

		Groupware = "Topic :: Office/Business :: Groupware"
		NewsDiary = "Topic :: Office/Business :: News/Diary"
		News = "Topic :: Office/Business :: News/Diary"
		Diary = News
		OfficeSuites = "Topic :: Office/Business :: Office Suites"
		Scheduling = "Topic :: Office/Business :: Scheduling"

	class ScientificEngineering(Classifier, classifier="Topic :: Scientific/Engineering"):
	Business = Office

	class Scientific(Classifier, classifier="Topic :: Scientific/Engineering"):

		ArtificialIntelligence = "Topic :: Scientific/Engineering :: Artificial Intelligence"
		ArtificialLife = "Topic :: Scientific/Engineering :: Artificial Life"
@@ -885,12 +898,15 @@ class Topic(Classifier):
		ImageProcessing = "Topic :: Scientific/Engineering :: Image Processing"
		ImageRecognition = "Topic :: Scientific/Engineering :: Image Recognition"
		InformationAnalysis = "Topic :: Scientific/Engineering :: Information Analysis"
		InterfaceEngineProtocolTranslator = "Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator"
		InterfaceEngine = "Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator"
		ProtocolTranslator = InterfaceEngine
		Mathematics = "Topic :: Scientific/Engineering :: Mathematics"
		MedicalScienceApps = "Topic :: Scientific/Engineering :: Medical Science Apps."
		Physics = "Topic :: Scientific/Engineering :: Physics"
		Visualization = "Topic :: Scientific/Engineering :: Visualization"

	Engineering = Scientific

	class Security(Classifier, classifier="Topic :: Security"):

		Cryptography = "Topic :: Security :: Cryptography"
@@ -988,18 +1004,21 @@ class Topic(Classifier):

		class SystemsAdministration(Classifier, classifier="Topic :: System :: Systems Administration"):

			class AuthenticationDirectory(Classifier, classifier="Topic :: System :: Systems Administration :: Authentication/Directory"):
			class Authentication(Classifier, classifier="Topic :: System :: Systems Administration :: Authentication/Directory"):

				LDAP = "Topic :: System :: Systems Administration :: Authentication/Directory :: LDAP"
				NIS = "Topic :: System :: Systems Administration :: Authentication/Directory :: NIS"

			Directory = Authentication

		Benchmark = "Topic :: System :: Benchmark"
		Clustering = "Topic :: System :: Clustering"
		ConsoleFonts = "Topic :: System :: Console Fonts"
		DistributedComputing = "Topic :: System :: Distributed Computing"
		Emulators = "Topic :: System :: Emulators"
		Filesystems = "Topic :: System :: Filesystems"
		InstallationSetup = "Topic :: System :: Installation/Setup"
		Installation = "Topic :: System :: Installation/Setup"
		Setup = Installation
		Logging = "Topic :: System :: Logging"
		Monitoring = "Topic :: System :: Monitoring"
		OperatingSystem = "Topic :: System :: Operating System"
@@ -1014,7 +1033,8 @@ class Topic(Classifier):

		Serial = "Topic :: Terminals :: Serial"
		Telnet = "Topic :: Terminals :: Telnet"
		TerminalEmulatorsX_Terminals = "Topic :: Terminals :: Terminal Emulators/X Terminals"
		TerminalEmulators = "Topic :: Terminals :: Terminal Emulators/X Terminals"
		X_Terminals = TerminalEmulators

	class TextEditors(Classifier, classifier="Topic :: Text Editors"):

@@ -1046,7 +1066,8 @@ class Topic(Classifier):
	AdaptiveTechnologies = "Topic :: Adaptive Technologies"
	ArtisticSoftware = "Topic :: Artistic Software"
	HomeAutomation = "Topic :: Home Automation"
	OtherNonlistedTopic = "Topic :: Other/Nonlisted Topic"
	Other = "Topic :: Other/Nonlisted Topic"
	NonlistedTopic = Other
	Printing = "Topic :: Printing"
	Religion = "Topic :: Religion"
	Utilities = "Topic :: Utilities"