#!/usr/bin/env python # Copyright (C) 2008 Richard Hacker # License: GPLv2 import mmap import re import sys class Document: # Regex to find a reference ref_regex = re.compile(r"\[[0-9]+\]") # Offset in file pointing to "@footnote:\n" footnote_pos = 0 # Original footnote reference number appearance orig_footnote = [] # New footnote sorting with original numbers new_footnote = [] # Mapping {original_number} -> [new_number, offset of footnote text] footnote = {} # Current reference index refidx = 1 def __init__(self,file): f = open(file) self.fmap = mmap.mmap(f.fileno(), 0, mmap.MAP_SHARED, mmap.PROT_READ) # Find footnote separator marker = "\n@footnote:\n" self.footnote_pos = self.fmap.find(marker) + 1 self.__read_footnotes() # Go through the footnotes in original order and renumber them def sort_by_footnote(self): for i in xrange(len(self.orig_footnote)): self.__new_reference(self.orig_footnote[i]) # Print the file body and footnote def output(self): self.__print_body() self.__print_footnote() # Fill in the list self.orig_footnote and the map self.footnote def __read_footnotes(self): self.fmap.seek(self.footnote_pos) while self.fmap.tell() != self.fmap.size(): pos = self.fmap.tell() line = self.fmap.readline() # All lines proceeding the footnote marker have to begin with # [0-9], otherwise they are discarded. Use Python's match() method # to force a match at the line start m = self.ref_regex.match(line) if m != None: refnumber = int(m.group(0)[1:-1]) self.orig_footnote.append(refnumber) self.footnote[refnumber] = [0, pos+m.end(), 9] self.new_footnote = [0] * len(self.orig_footnote) # Return the new reference number as a string "[new-number]" def __new_reference_str(self, pat): ref = self.__new_reference(int(pat.group(0)[1:-1])) if ref: return "[%i]" % ref else: return "[??unknown??]" # Allocate and return a reference a new number. If the original number does # not appear in the footnotes section, return 0 def __new_reference(self, oldref): if not self.footnote.has_key(oldref): return 0 if not self.footnote[oldref][0]: # Assign new reference number self.footnote[oldref][0] = self.refidx self.new_footnote[self.refidx-1] = oldref self.refidx = self.refidx + 1 return self.footnote[oldref][0] def __print_body(self): self.fmap.seek(0) while self.fmap.tell() < self.footnote_pos: line = self.fmap.readline() # Substitute all references with their new values print self.ref_regex.sub(self.__new_reference_str, line), def __print_footnote(self): self.fmap.seek(self.footnote_pos) print self.fmap.readline(), for ref in self.new_footnote: self.fmap.seek(self.footnote[ref][1]) print "[%i] %s" % (self.footnote[ref][0], self.fmap.readline()), doc = Document(sys.argv[-1]) # Calling this program with the -a switch to preserve the original footnote # ordering if "-a" in sys.argv: doc.sort_by_footnote() doc.output()