SAMPLE = '''A great brown fox [13] jumped of a pile of lorem ipsum [4], [7]. He met with a silver penguin, browsing the Linux Kernel Mailinglist [3]. They debated other the question whether to start a C-program with "main (int argc, char **argv)" or with "main (int argc, char *argv[])". Square brackets annoyed them [9999]. Multiple references may exist to same targets [4]. @footnote: [13] Al Fabetus: "On characters and animals", 1888, self published. [4] Lorem Ipsum, Web Link [9999] Annoying Link. [7] B. Fox: "More on Blind Text". [3] Linux Kernel Maintainers: LKML ''' import sys, re def nodups(lst): new = [] for x in lst: if x not in new: new.append(x) return new def mkMap(lst): return dict([(ref, "[%d]" % (n+1)) for n, ref in enumerate(lst)]) def asNum(line): try: key, _ = line[1:].split(']', 1) key = int(key) except: key = line or None # If line looks wrong, just return the raw line return key def footnote(s, usebody=False): FOOTSPLIT = '\n@footnote:\n' body, foots = s.split(FOOTSPLIT) links = nodups(re.findall(r'\[\d+\]', body)) targets = nodups(re.findall(r'\[\d+\]', foots)) mapping = mkMap(links if usebody else targets) # Ref order of body xor foots # If Python < 2.5, could use: # mapping = mkMap((targets, links)[usebody]) # Replace source->target footnote numbers def numsub(m): return mapping[m.group(1)] body = re.sub(r'(\[\d+\])', numsub , body) foots = re.sub(r'(\[\d+\])', numsub, foots) # May need to reorder target lines in foots if usebody: foots = '\n'.join(sorted(foots.splitlines(), key=asNum)) return body + FOOTSPLIT + "\n" + foots.strip() if __name__ == '__main__': if '--test' in sys.argv[1:]: print footnote(SAMPLE, usebody='-a' in sys.argv[1:]) else: print footnote(sys.stdin.read(), usebody='-a' in sys.argv[1:])