Loading .mutt/scripts/extract-aliases.py 0 → 100644 +157 −0 Original line number Diff line number Diff line import re import os import sys from os import path class AliasFile: def __init__(self, filename): self.file = filename self._aliases = {} self._updates = {} self._index = {} self.read() def read(self): """Read an alias file into a dict of lists and an index Return a tuple containing a dict of alias lists and a mapping index of their positions in the file. """ aliases = self._aliases index = self._index with open(self.file, 'rb') as fp: for line in fp: if not line.startswith(b'alias'): continue _, alias, entries = line.decode('utf-8').split(None, 2) aliases[alias] = {e.strip() for e in entries.split(',')} index[alias] = fp.tell() - len(line) def write(self): """Update an alias file Use the 'index' mapping returned from read_aliases() and 'updates' to update the alias file with modified values from aliases, """ filepath = self.file updates = self._updates aliases = self._aliases index = self._index try: os.makedirs(path.dirname(filepath), 0o770) except OSError: pass mode = 'r+b' if path.exists(filepath) else 'w+b' with open(filepath, mode) as fp: for alias, entries in updates.items(): if not isinstance(entries, set): entries = set(entries) if entries == aliases.get(alias): continue aline = 'alias {} {}\n'.format(alias, ', '.join(entries)) try: fp.seek(index[alias]) except KeyError: pass else: fp.write(b'#') fp.seek(0, os.SEEK_END) fp.write(aline.encode('utf-8')) aliases.update(updates) updates.clear() def __getitem__(self, alias): current = self._aliases.get(alias) current = set() if current is None else current.copy() return self._updates.setdefault(alias, current) def __setitem__(self, alias, entries): self._updates[alias] = entries def sift(): """Copy stdin to stdout, yielding any address headers found""" write = sys.stdout.write address_header_re = re.compile( r'^(from|reply-to|return-path|to|cc|bcc):', re.I) for line in sys.stdin: if address_header_re.match(line): yield line.split(':', 1)[1].strip() if line.rstrip() == '': # end of headers, break out into next simplified loop for # efficiency. write(line) break write(line) # quickly write out the rest of the email for line in sys.stdin: write(line) sys.stdout.close() def generate_aliases(contacts): """Parse several name+address formats used in From headers""" re_parts = dict( name = r'(?P<_quo>")?(?P<name>[a-z][^"@]*[a-z])(?(_quo)")', addr = r'(?P<addr>[^<]\S+[@]\S+[^>])', pre = r'(?:(?P<_pre_raquo><)|(?P<_pre_mailto>[[]mailto:))', post = r'(?(_pre_raquo)>|(?(_pre_mailto)]))', ) regexes = [ re.compile( r'\s*{name}?\s*' r'(?(name){pre})' r'{addr}' r'{post}\s*(?:,|$)' .format(**re_parts), re.I), re.compile( r'\s*{addr}\s*' r'(?:\(\s*{name}\s*\))?\s*(?:,|$)' .format(**re_parts), re.I), ] combinations = ((re,con) for re in regexes for con in contacts) re_no_reply = re.compile('no.?reply', re.I) for regex, contact in combinations: for match in regex.finditer(contact): details = match.groupdict() # lets not accept any of those "no-reply" addresses # sys.stderr.write(repr(details)) if re_no_reply.match(details['addr']): continue if details['name'] and re_no_reply.match(details['name']): continue localpart = details['addr'].split('@', 1)[0] name = pivot_name(details['name'] or localpart) entry = '{} <{}>'.format(name, details['addr']) for names in name_combi(name.lower().split()): yield '-'.join(names), entry def name_combi(names): """Generate different orders of names for aliases""" yield names if len(names) >= 2: yield names[-1:] + names[:-1] yield names[0], names[-1] def pivot_name(name): """Pivot a surname first name, "Doe, Jon A." => "Jon A. Doe" """ if ',' not in name: return name sur, fore = name.split(',', 1) return '{} {}'.format(fore.replace(',', ''), sur) def process(alias_file): contacts = list(sift()) aliases = AliasFile(alias_file) for alias, entry in generate_aliases(contacts): aliases[alias].add(entry) aliases.write() if __name__ == '__main__': process(*sys.argv[1:]) Loading
.mutt/scripts/extract-aliases.py 0 → 100644 +157 −0 Original line number Diff line number Diff line import re import os import sys from os import path class AliasFile: def __init__(self, filename): self.file = filename self._aliases = {} self._updates = {} self._index = {} self.read() def read(self): """Read an alias file into a dict of lists and an index Return a tuple containing a dict of alias lists and a mapping index of their positions in the file. """ aliases = self._aliases index = self._index with open(self.file, 'rb') as fp: for line in fp: if not line.startswith(b'alias'): continue _, alias, entries = line.decode('utf-8').split(None, 2) aliases[alias] = {e.strip() for e in entries.split(',')} index[alias] = fp.tell() - len(line) def write(self): """Update an alias file Use the 'index' mapping returned from read_aliases() and 'updates' to update the alias file with modified values from aliases, """ filepath = self.file updates = self._updates aliases = self._aliases index = self._index try: os.makedirs(path.dirname(filepath), 0o770) except OSError: pass mode = 'r+b' if path.exists(filepath) else 'w+b' with open(filepath, mode) as fp: for alias, entries in updates.items(): if not isinstance(entries, set): entries = set(entries) if entries == aliases.get(alias): continue aline = 'alias {} {}\n'.format(alias, ', '.join(entries)) try: fp.seek(index[alias]) except KeyError: pass else: fp.write(b'#') fp.seek(0, os.SEEK_END) fp.write(aline.encode('utf-8')) aliases.update(updates) updates.clear() def __getitem__(self, alias): current = self._aliases.get(alias) current = set() if current is None else current.copy() return self._updates.setdefault(alias, current) def __setitem__(self, alias, entries): self._updates[alias] = entries def sift(): """Copy stdin to stdout, yielding any address headers found""" write = sys.stdout.write address_header_re = re.compile( r'^(from|reply-to|return-path|to|cc|bcc):', re.I) for line in sys.stdin: if address_header_re.match(line): yield line.split(':', 1)[1].strip() if line.rstrip() == '': # end of headers, break out into next simplified loop for # efficiency. write(line) break write(line) # quickly write out the rest of the email for line in sys.stdin: write(line) sys.stdout.close() def generate_aliases(contacts): """Parse several name+address formats used in From headers""" re_parts = dict( name = r'(?P<_quo>")?(?P<name>[a-z][^"@]*[a-z])(?(_quo)")', addr = r'(?P<addr>[^<]\S+[@]\S+[^>])', pre = r'(?:(?P<_pre_raquo><)|(?P<_pre_mailto>[[]mailto:))', post = r'(?(_pre_raquo)>|(?(_pre_mailto)]))', ) regexes = [ re.compile( r'\s*{name}?\s*' r'(?(name){pre})' r'{addr}' r'{post}\s*(?:,|$)' .format(**re_parts), re.I), re.compile( r'\s*{addr}\s*' r'(?:\(\s*{name}\s*\))?\s*(?:,|$)' .format(**re_parts), re.I), ] combinations = ((re,con) for re in regexes for con in contacts) re_no_reply = re.compile('no.?reply', re.I) for regex, contact in combinations: for match in regex.finditer(contact): details = match.groupdict() # lets not accept any of those "no-reply" addresses # sys.stderr.write(repr(details)) if re_no_reply.match(details['addr']): continue if details['name'] and re_no_reply.match(details['name']): continue localpart = details['addr'].split('@', 1)[0] name = pivot_name(details['name'] or localpart) entry = '{} <{}>'.format(name, details['addr']) for names in name_combi(name.lower().split()): yield '-'.join(names), entry def name_combi(names): """Generate different orders of names for aliases""" yield names if len(names) >= 2: yield names[-1:] + names[:-1] yield names[0], names[-1] def pivot_name(name): """Pivot a surname first name, "Doe, Jon A." => "Jon A. Doe" """ if ',' not in name: return name sur, fore = name.split(',', 1) return '{} {}'.format(fore.replace(',', ''), sur) def process(alias_file): contacts = list(sift()) aliases = AliasFile(alias_file) for alias, entry in generate_aliases(contacts): aliases[alias].add(entry) aliases.write() if __name__ == '__main__': process(*sys.argv[1:])