#!/usr/bin/env python # -*- coding: utf-8 -*- """ =================================== Linux Magazin 10/08 Babylon zu fünft / Sprach-Vergleich =================================== by Grégoire Weber and Robert Hunger - based on the solution from David Mertz and Ka-Ping Yee (Listing 4) - with ideas from the nice PHP OO solution from Zeev Suraski (Listing 1) USAGE: renumber-footnotes.py [--test] [-v] file --test run unit tests -v run unit tests in verbose mode """ import sys import re SAMPLE_INPUT = ''' A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They debated other the question whether to start a C-program with "main (int argc, char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed them [9999]. Multiple references may exist to same targets [4]. @footnote: [13] Al Fabetus: "On characters and animals", 1888, self published. [4] Lorem Ipsum, Web Link [9999] Annoying Link. [7] B. Fox: "More on Blind Text". [3] Linux Kernel Maintainers: LKML ''' SAMPLE_EXPECTED = ''' A great brown fox [1] jumped of a pile of lorem ipsum [2], [3]. He met with a silver penguin, browsing the Linux Kernel Mailinglist [4]. They debated other the question whether to start a C-program with "main (int argc, char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed them [5]. Multiple references may exist to same targets [2]. @footnote: [1] Al Fabetus: "On characters and animals", 1888, self published. [2] Lorem Ipsum, Web Link [3] B. Fox: "More on Blind Text". [4] Linux Kernel Maintainers: LKML [5] Annoying Link. ''' class FootnoteRenumberer(object): """renumber footnote references Begins with '[1]' incrementing with each unknown footnote reference. Returns the footnote reference of first occurence for already known references. >>> footnoteRenumberer = FootnoteRenumberer() >>> re.sub(r'(\[\d+\])', footnoteRenumberer, 'foo [99] bar [3] su [99]') 'foo [1] bar [2] su [1]' >>> footnoteRenumberer._counter 3 >>> footnoteRenumberer._map {'[3]': '[2]', '[99]': '[1]'} """ def __init__(self): self._counter = 1 self._map = {} def __call__(self, match): footnoteNumber = match.group(1) if footnoteNumber not in self._map: self._map[footnoteNumber] = "[%s]" % self._counter self._counter += 1 return self._map[footnoteNumber] def extractNumber(line): """extract number for footnote lines, returns None else Unnumberd lines appear at the top when sorted. """ try: return int(line[1:].split(']', 1)[0]) except: return None def renumberFootnotes(lineIter, write=sys.stdout.write): """ >>> lines = [] >>> renumberFootnotes(SAMPLE_INPUT.splitlines(True), lines.append) >>> "".join(lines) == SAMPLE_EXPECTED True """ footnoteRenumberer = FootnoteRenumberer() footnoteRegex = re.compile(r'(\[\d+\])') putLine = write footnotes = [] for line in lineIter: if line.startswith("@footnote:"): putLine(line) putLine = footnotes.append continue putLine(footnoteRegex.sub(footnoteRenumberer, line)) footnotes.sort(key=extractNumber) write("".join(footnotes)) if __name__ == '__main__': if '--test' in sys.argv[1:]: import doctest doctest.testmod() elif len(sys.argv) == 2: renumberFootnotes(file(sys.argv[1])) else: print __doc__ sys.exit(1)