#!/usr/bin/env python # -*- coding: utf8 -*- """ reorder footnotes reads the input file line by line and writes every processed line direct to stdout. So it is not a problem if the file is larger than the available RAM. Footnotes are renumbered as they appear in the input file starting from 1. The footnotes in the footnote section at the end of the file are stored in memory, are renumbered and, after the last footnote is read, printed out in sorted order. usage: $ python footnotes2.py infile.txt [ > outfile.txt] """ import re import fileinput MARKER = "@footnote:" FNOTE = re.compile(r"\[(\d+)\]") class FootnoteSorter(object): def __init__(self): self.in_footer = False self.fndict = {} # footnote dict: key: old num, value: new num self.fnc = 0 # new footnote counter self.footnotes = {} # holds all footnotes from the footnote-section: # key: new footnote reference number # value: footnote-text with the new reference number for line in fileinput.input(): self.process_line(line) # write the footnotes sorted by reference to stdout: keys = self.footnotes.keys() keys.sort() for key in keys: print self.footnotes[key].strip() def process_line(self, line): if line.startswith(MARKER): self.in_footer = True print line.strip() return if not self.in_footer: fnlist = FNOTE.findall(line) # fnl: footnotelist for this line for fn in fnlist: if fn not in self.fndict: # it's a new "old" footnote number self.fnc += 1 self.fndict[fn] = str(self.fnc) line = line.replace(fn, self.fndict[fn]) print line.strip() else: # we are in the footer-section: # this lines are stored in memory and not direct send to stdout. mo = FNOTE.match(line) if mo is not None: old_fn = mo.group(1) # old footnote number try: new_fn = self.fndict[old_fn] # new footnote number except KeyError: pass # ignore else: self.footnotes[int(new_fn)] = line.replace(old_fn, new_fn) if __name__ == "__main__": FootnoteSorter()