#!/usr/bin/env python """Usage: FootnoteSorter.py [-a] [--test] [< input.txt] -a Number footnotes in order of apperance in text --test Self-testing using the SAMPLE in the script source. """ SAMPLE = '''A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They debated other the question whether to start a C-program with "main (int argc, char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed them [9999]. Multiple references may exist to same targets [4]. @footnote: [13] Al Fabetus: "On characters and animals", 1888, self published. [4] Lorem Ipsum, Web Link [9999] Annoying Link. [7] B. Fox: "More on Blind Text". [3] Linux Kernel Maintainers: LKML ''' import sys, re, fileinput, tempfile, os FOOTNOTE_PATTERN = re.compile(r'\[\d+\]') def checkOption(args, option): '''Check if an option is contained in the array args. If it is in the array, it will be deleted.''' try: index = args.index (option) del args[index] return True except ValueError: return False def fileFilter(input, old2New): '''Filter every line of input.''' def op(m, old2New=old2New): '''This gets called for every match of the FOOTNOTE_PATTERN.''' key = m.group(0) # Check if we already have a mapping for this reference newValue = old2New.get(key) if newValue is None: # If not, create a new one newValue = '[%d]' % (len(old2New) + 1) old2New[key] = newValue return newValue # Read every line of input and replace the footnote references in it # using the operator defined above. for line in input: line = FOOTNOTE_PATTERN.sub(op, line) sys.stdout.write(line) if __name__ == '__main__': isTest = checkOption (sys.argv, '--test') byFirstOccurrence = checkOption (sys.argv, '-a') if isTest: # Create a text input from the sample input = SAMPLE.split('\n') for i in range(0,len(input)): input[i] = '%s\n' % input[i] else: input = fileinput.FileInput(openhook=fileinput.hook_compressed) if byFirstOccurrence: # This is a simple case. Just read the file # and build the map as we encounter references. fileFilter (input, {}) else: # Here, we need a temporary file because we have to do # two passes. In the first pass, we figure out the # correct order by reading the footnote block. # # In the second pass, we use the mapping to filter # the file just as in the simple case. # # Since the filter will find existing mappings, it won't # create them by itself and thus, we get the desired result. tmpFD, tmpFile = tempfile.mkstemp(text=True) try: fh = os.fdopen(tmpFD, 'w+') old2New = {} isText = True for line in input: # Make a copy of the input fh.write(line) if isText: if line.startswith('@footnote:'): isText = False continue else: # If we are in the footnote part, create the mapping # by adding new references as we encounter them. pos = line.find(']') if pos == -1: continue oldValue = line[0:pos+1] newValue = '[%d]' % (len(old2New) + 1) old2New[oldValue] = newValue # Read the temporary file again from the start fh.seek(0) # and filter it just as above. fileFilter (fh, old2New) fh.close() finally: # Always delete the temporary file os.remove(tmpFile)