#!/usr/bin/env python # -*- coding: utf-8 -*- """Renumbers and reorders footnotes in a text file. This program reads text from standard input, renumbers footnotes and writes the result to standard output. It is an implementation of a programming task by the german `Linux Magazin`_. The `task description`_ is in english though. The input format is a text with footnote references followed by a list of footnotes. Both are divided by a line with the text ``@footnotes:`` [1]_. References in the body text are numbers in square brackets and footnotes are listed one footnote per line, starting with a footnote number in square brackets. Example:: This is some text without purpose or meaning [42]. @footnotes: [42] Really! After the ``@footnotes:`` line only footnotes or blank lines (just whitespace) are allowed and each footnote number must be unique. Every footnote must be referenced in the text. Originally the program also checked if there is a footnote for every reference in the body text, but then it would reject the sample data from `Linux Magazin`_ for this task. The output is the same text with renumbered footnotes. The footnotes are renumbered and the references are replaced accordingly. With the option ``--reorder`` the footnotes are sorted by first appearence of a reference to them, before the renumbering takes place. .. [1] There is some confusion in the task description if it is actually ``@footnotes:`` or ``@footnote:``. The given sample file contains *both* lines! .. _Linux Magazin: http://www.linux-magazin.de/ .. _task description: http://www.linux-magazin.de/content/download/27388/238793/file/Tasks_languages.pdf """ import re import sys import textwrap from itertools import imap from optparse import OptionParser __author__ = "Marc 'BlackJack' Rintsch" __version__ = '0.3' __date__ = '2008-09-28' __docformat__ = "restructuredtext en" #: Regular expression to match and extract reference numbers. REFERENCE_RE = re.compile(r'\[(\d+)\]') class Error(Exception): """Error class for errors in parsing the document or inconsistencies of references/footnotes. """ class Footnote(object): """A single `Footnote`. :IVariables: number : int Number of the `Footnote`. text : string Content of the `Footnote`. """ def __init__(self, number, text): """Creates `Footnote` from number and text.""" self.number = number self.text = text def __str__(self): return '[%d]%s' % (self.number, self.text) @classmethod def parse(cls, line): """Parse a single text line into a `Footnote`. :raises Error: if the line is no valid footnote. :returns: parsed `Footnote`. """ match = REFERENCE_RE.match(line) if not match: raise Error('not a valid footnote: %r' % line) return cls(int(match.group(1)), line[match.end():]) class Footnotes(object): """An iterable of `Footnote` objects.""" def __init__(self, footnotes): """Creates `Footnotes` from given footnotes. :param footnotes: an iterable of `Footnote` objects. :raises Error: if there are `Footnote`\s with identical numbers. """ self.footnotes = list() numbers_seen = set() for footnote in footnotes: number = footnote.number if number in numbers_seen: raise Error('duplicated footnote number %d' % number) numbers_seen.add(number) self.footnotes.append(footnote) def __iter__(self): """Iterate over the `Footnote`\s.""" return iter(self.footnotes) def __str__(self): return '\n'.join(imap(str, self)) @classmethod def parse(cls, text): """Parses text that consists of a footnote per line. The text may contain blank lines which are ignored. :returns: parsed `Footnotes`. :raises Error: if a line has the wrong format. :raises Error: if there are duplicated footnote numbers. """ return cls(imap(Footnote.parse, (line for line in text.splitlines() if line.strip()))) def renumber(self): """Renumbers the footnotes. The new numbers start at 1. :returns: a mapping of old to new footnote numbers. :rtype: dict of string -> string """ old2new = dict() for i, footnote in enumerate(self): old_number, footnote.number = footnote.number, i + 1 old2new['[%d]' % old_number] = '[%d]' % footnote.number return old2new def reorder(self, numbers): """Reorders footnotes by given numbers. Footnotes that are not referenced by *numbers* are removed. Numbers that are not used as footnote numbers are ignored. :param numbers: numbers that are used to reorder the footnotes. :type numbers: iterable of int """ number2footnote = dict((f.number, f) for f in self) self.footnotes = [number2footnote[n] for n in numbers if n in number2footnote] class Document(object): """A `Document` with text body and `Footnotes`.""" footnote_section = '@footnotes:' def __init__(self, body, footnotes, footnote_section=None, check_ref2foot=True, check_foot2ref=True): """Creates a `Document`. :Parameters: body : string Body text of the `Document`. footnotes : `Footnotes` `Footnotes` of the document. footnote_section : string or `None` If given it is used to separate the body text from the footnotes when converting a `Document` to string. check_ref2foot : bool Check for unreferenced footnotes. check_foot2ref : bool Check for unused footnotes. :raises Error: if there is a reference to a non existing footnote and *check_ref2foot* is `True`. :raises Error: if there is a footnote that is not referenced and *check_foot2ref* is `True`. """ self.body = body self.footnotes = footnotes if footnote_section is not None: self.footnote_section = footnote_section # # Cross check references and footnote numbers. # if check_ref2foot or check_foot2ref: footnote_numbers = set(f.number for f in self.footnotes) for reference in self.iter_references_in_body(): try: footnote_numbers.remove(reference) except KeyError: if check_ref2foot: raise Error('undefined footnote %r' % reference) if check_foot2ref and footnote_numbers: raise Error('unreferenced footnotes: %s' % ', '.join(imap(str, sorted(footnote_numbers)))) def __str__(self): return '%s\n%s\n\n%s' % (self.body, self.footnote_section, self.footnotes) @classmethod def parse(cls, text, footnote_section=None, check_ref2foot=True, check_foot2ref=True): """Parses document with footnotes. :Parameters: text : string The text document with footnotes to parse. footnote_section : string or `None` If given it is the line dividing the body text from the footnotes. check_ref2foot : bool Check for unreferenced footnotes. check_foot2ref : bool Check for unused footnotes. :raises Error: • If there is no footnote section, • if a footnote line has the wrong format, • if there is a reference to a non existing footnote and *check_ref2foot* is `True`, • or if there is a footnote that is not referenced and *check_foot2ref* is `True`. """ if footnote_section is None: footnote_section = cls.footnote_section try: body, footnotes = text.split('\n%s\n' % footnote_section, 1) except Error: raise Error('no footnote section found') return cls(body, Footnotes.parse(footnotes), footnote_section, check_ref2foot, check_foot2ref) def iter_references_in_body(self): """Iterates over the references in the body text. Each reference is returned once in the order it appears in the body text. :returns: iterable of references. :rtype: iterable of int """ seen = set() for reference in (int(m.group(1)) for m in REFERENCE_RE.finditer(self.body)): if reference not in seen: seen.add(reference) yield reference def renumber_footnotes(self, reorder=False): """Renumbers and optionally reorders the footnotes. :param reorder: If `True` the footnotes are ordered by the first reference to them from the text body before renumbering takes place. Unreferenced footnotes are removed then. :type reorder: bool """ if reorder: self.footnotes.reorder(self.iter_references_in_body()) old2new = self.footnotes.renumber() def replace(match): return old2new.get(match.group(0), match.group(0)) self.body = REFERENCE_RE.sub(replace, self.body) def main(): """\ Parses text document with footnotes, renumbers, and optionally reorders the footnotes. """ option_parser = OptionParser(version=__version__, description=textwrap.dedent(main.__doc__)) option_parser.add_option('--reorder', action='store_true', help='reorder the footnotes by first appearence' ' in text body') option_parser.add_option('--separator', metavar='STR', default=Document.footnote_section, help='content of line that separates the body' ' text from the footnotes (default' ' "%default")') option_parser.add_option('--check-references', action='store_true', help='check if every reference in the text has' ' a footnote') option_parser.add_option('--check-footnotes', action='store_true', help='check if every footnote is used in the text') options, args = option_parser.parse_args() try: document = Document.parse(sys.stdin.read(), options.separator, options.check_references, options.check_footnotes) document.renumber_footnotes(options.reorder) print document except Error, error: print >> sys.stderr, 'Error:', error if __name__ == '__main__': main()