#!/usr/bin/env python import sys, re # initialize the dictionary footnotes = dict() # compile the regex. not precompiling the regex will save 1 line of source but increase runtime by about 20%. footnote_re = re.compile('\[(?P\d+)\]') # read in the whole file. Make sure you have enough memory data = file(sys.argv[1], "r").read() # generate new footnote ID, using a lambda function and the dicts setdefault method. Then split the file at the separator (data, footnotes) = footnote_re.sub(lambda match: '[%d]' %(footnotes.setdefault(data[match.start('id'):match.end('id')], len(footnotes) + 1)), data).split('\n@footnote:\n') # write the data, the separator and the footnote to stdout. First creating a list of the footnotes, filtering it to make sure it only contains footnotes. Then sort it using a lambda to extract the ID. sys.stdout.write(data + '\n@footnote:\n' + '\n'.join(sorted(filter(lambda f: footnote_re.match(f), footnotes.split('\n')), lambda a,b: int(footnote_re.findall(a)[0]) - int(footnote_re.findall(b)[0]))) + '\n')