#! /usr/bin/env python # Copyright (C) 2008, Stefan Schwarzer """ This is a command line utility to reorder footnotes, denoted by numbers in brackets, as in the example below. The file to process is given as the single command line argument whereas the output is written to standard output. Errors and warning are written to standard error. In the output file, footnotes in the main text are numbered 1, 2, 3, .... The numbers in the footnote list, i. e. after the marker "@footnote:", are sorted numerically to fit the order in the main text. There can be multiple references in the main text pointing to the same number in the footnote list. The program warns about common mistakes like footnotes in the main text which don't occur in the footnote list, or footnotes in the list which have no correspondence in the main text. Here's an input file example: A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They debated other the question whether to start a C-program with "main (int argc, char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed them [9999]. A repeated index [4]. @footnote: [13] Al Fabetus: "On characters and animals", 1888, self published. [4] Lorem Ipsum, Web Link [9999] Annoying Link. [7] B. Fox: "More on Blind Text". [3] Linux Kernel Maintainers: LKML This would become: A great brown fox [1] jumped of a pile of lorem ipsum [2], [3]. He met with a silver penguin, browsing the Linux Kernel Mailinglist [4]. They debated other the question whether to start a C-program with "main (int argc, char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed them [5]. A repeated index [2]. @footnote: [1] Al Fabetus: "On characters and animals", 1888, self published. [2] Lorem Ipsum, Web Link [3] B. Fox: "More on Blind Text". [4] Linux Kernel Maintainers: LKML [5] Annoying Link. """ import re import sys # separates main text and footnotes list FOOTNOTE_MARKER = "@footnote:" # bracketed index numbers index_regex = re.compile(r"\[\d+\]") # mapping of former to reassigned index numbers old_to_new = {} # next index number to use for replacement in main text next_number = 1 def warn(text): print >> sys.stderr, "Warning:", text def reassign_in_main_text(match): """Return the new bracketed index number for the main text. If an old index already has a new one assigned, reuse it. """ global next_number number = int(match.group(0)[1:-1]) if number not in old_to_new: old_to_new[number] = next_number next_number += 1 return "[%d]" % old_to_new[number] def process_main_text(fobj): """Print the main text with the reassigned numbers, reading the input lines from file object `fobj`. """ for line in fobj: # rstrip handles whitespace at end of line if line.rstrip() == FOOTNOTE_MARKER: return print index_regex.sub(reassign_in_main_text, line), def check_for_missing_footnotes(footnotes): """Print a warning about indices that are in the main text but not in the footnote list. `footnotes` is the list of index/line pairs generated in `process_footnote_list`. """ # `next_number` actually is the number of indices in the main # text plus one, so it fits perfectly here in_main_text = set(xrange(1, next_number)) in_footnotes = set((num for num, line in footnotes)) not_in_footnotes = [str(num) for num in sorted(in_main_text - in_footnotes)] if not_in_footnotes: warn("indices of missing footnotes: %s" % ", ".join(not_in_footnotes)) def process_footnote_list(fobj): """Print a new footnote list with the updated numbers, reading the input lines from file object `fobj`. """ # collect (yet unsorted) footnotes footnotes = [] for line in fobj: match = index_regex.search(line) if match is None: warn("ignored line: %s" % line.rstrip()) continue match_str = match.group(0) old_number = int(match_str[1:-1]) if old_number in old_to_new: new_number = old_to_new[old_number] else: warn("index number %d not in original main text" % old_number) continue new_line = line.replace(match_str, "[%d]" % new_number) footnotes.append((new_number, new_line)) # implicit numerical sorting by the new index numbers footnotes.sort() for number, line in footnotes: print line, check_for_missing_footnotes(footnotes) def main(filename): try: fobj = open(filename) except IOError: print >> sys.stderr, "Error: file '%s' not found" % filename return try: process_main_text(fobj) print FOOTNOTE_MARKER process_footnote_list(fobj) finally: fobj.close() if __name__ == '__main__': try: filename = sys.argv[1] except IndexError: print >> sys.stderr, "Usage: %s input_file" % sys.argv[0] main(filename)