import java.util.*; import java.io.*; import java.util.regex.*; /** * Renumber endnotes in an almost-flat text file. *

* This example is structured as a driver class plus one or more * inner classes which function as workers. The driver class presents * the API to the rest of the world, including a main routine * and (potentially) a set of option switches and entry points. * The worker class is private to the driver, and embodies the * pass structure of the problem. *

* Most program elements are declared as having default access, * which means they are private to this class and its siblings * in the same package. Upgrading this example to a reusable * API would require moving it to a named package, deciding * which elements ought to be made private, and adding public * API elements to manage option control and invocation. *

* This example code was developed on a MacBook Pro using NetBeans 6.1. * * @author john.rose@sun.com */ public class EndNotes { // --- options (note: we could put these under an API) /** Regexp for endnote uses. */ Pattern usePattern = Pattern.compile("\\[([0-9]+)\\]"); /** Regexp for endnote definitions. */ Pattern defPattern = Pattern.compile("(?<=^\\s*)\\[([0-9]+)\\]"); /** Replacement expression for the notation we are transforming. */ String noteRewrite = "[%s]"; /** Regexp for the marker separating body from endnotes. */ String boundaryPattern = "^\\s*@footnotes?:\\s*$"; /** How lines are separated on output. */ String lineSeparator = System.getProperty("line.separator", "\n"); /** If true, definitions are resorted. */ boolean resortDefs; /** If true, definitions are renumbered in compact ascending order. * Otherwise, initial uses are renumbered in compact ascending order. * (+++ Bonus feature.) */ boolean renumberByDef; /** If true, generate placeholder lines for missing definitions. * (+++ Bonus feature.) */ boolean generateMissing; /** Map endnote references to their new numberings. */ TreeMap oldNotes = new TreeMap(); /** Next note number to generate into oldNotes.values. */ int nextNewNote = 1; /** Output channel. */ Writer out; /** * Given an old endnote numeral, find or create * a new endnote number. The new numbers are produced in a compact * sequence beginning with {@link #nextNewNote}. * The ordering of calls to this method is significant, * because the first call to a previously unseen numeral * will always return the value of {@link #nextNewNote}. * @param oldnum the old numeral (without brackets) * @return new number */ int getNewNumber(String oldnum) { Integer newnum = oldNotes.get(oldnum); if (newnum == null) { oldNotes.put(oldnum, newnum = nextNewNote++); } return (int) newnum; } /** * Find all endnote occurrences in the given string, and renumber them. * @param line a line of text; may contain embedded line separators * @param pattern the endnote pattern; group(1) is replaced * @return the original line with endnotes renumbered */ String replaceNotes(String line, Pattern pattern) { Matcher m = pattern.matcher(line); StringBuffer sb = null; while (m.find()) { int newnum = getNewNumber(m.group(1)); if (newnum < 0) { continue; } if (sb == null) { sb = new StringBuffer(line.length() + 20); } String newnote = String.format(noteRewrite, newnum); m.appendReplacement(sb, newnote); } return (sb == null) ? line : m.appendTail(sb).toString(); } /** The processing loop, with pluggable behaviors. * It is an inner class (non-static) so that the main * class can supply environmental settings as if they were * global variables. */ class Scanner { BufferedReader in; void doFile(File file) throws IOException { in = new BufferedReader(new FileReader(file)); try { String lastLine = doBody(); if (lastLine != null) doEndNotes(lastLine); } finally { in.close(); in = null; } } String doBody() throws IOException { for (String line; (line = in.readLine()) != null;) { if (line.matches(boundaryPattern)) return line; putLine(doBodyLine(line)); } return null; } String doBodyLine(String line) { return replaceNotes(line, usePattern); } void doEndNotes(String line) throws IOException { for (; line != null; line = in.readLine()) putLine(doEndNoteLine(line)); } String doEndNoteLine(String line) { return replaceNotes(line, defPattern); } void putLine(String line) throws IOException { if (line == null) return; out.write(line); out.write(lineSeparator); } } /** Use this for sorting endnotes. */ class NoteComparator implements Comparator { public int compare(String n1, String n2) { Matcher m1 = defPattern.matcher(n1); Matcher m2 = defPattern.matcher(n2); int c = 0; if (!m1.find()) c = m2.find() ? -1 : 0; else if (!m2.find()) c = 1; else c = comparePrimary(m1, m2); // secondary key is the whole line: if (c == 0) c = n1.compareTo(n2); return c; } /** Use the note prefix from the strings as a primary key. */ int comparePrimary(Matcher m1, Matcher m2) { Integer i1 = getNewNumber(m1.group(1)); Integer i2 = getNewNumber(m2.group(1)); return i1.compareTo(i2); } }; /** Variant of Scanner which collects endnotes, rather than * immediately renumbering them. */ class CollectingScanner extends Scanner { int endNote; ArrayList endText = new ArrayList(); @Override String doEndNoteLine(String line) { if (defPattern.matcher(line).find()) { ++endNote; } if (endText.size() <= endNote) { endText.add(line); assert(endText.size() == endNote+1); } else { // collect multi-line note endText.set(endNote, endText.get(endNote) + lineSeparator + line); } return null; // tell putLine to do nothing } List endNotes() { return endText.subList(1, endText.size()); } void finishEndNotes() throws IOException { for (String line : endText) putLine(super.doEndNoteLine(line)); } // +++ BONUS FEATURE void generateMissingEndNotes() throws IOException { TreeSet missing = new TreeSet(); for (int i = 1; i < nextNewNote; i++) missing.add(i); for (String line : endNotes()) { Matcher m = defPattern.matcher(line); if (m.find()) missing.remove(getNewNumber(m.group(1))); } for (int i : missing) { String gen = String.format(noteRewrite, i)+" (missing)"; putLine(gen); } } } public void doFile(File file) throws IOException { if (renumberByDef) { // +++ BONUS FEATURE: scan endnotes before body +++ // We will use anonymous inner classes to add the extra methods. // must visit the defs section first CollectingScanner firstPass = new CollectingScanner() { // totally ignore body lines @Override String doBodyLine(String line) { return null; } // produce no output at all @Override void putLine(String line) { } }; firstPass.doFile(file); if (resortDefs) { Collections.sort(firstPass.endNotes(), new NoteComparator() { /** First strip the note prefix from the strings, then compare. */ @Override int comparePrimary(Matcher m1, Matcher m2) { return m1.replaceFirst("").compareToIgnoreCase(m2.replaceFirst("")); } }); } firstPass.finishEndNotes(); } // streaming pass over the data, buffering one body line at a time if (!resortDefs && !generateMissing) { new Scanner().doFile(file); } else { CollectingScanner scanner = new CollectingScanner(); scanner.doFile(file); if (resortDefs) Collections.sort(scanner.endNotes(), new NoteComparator()); scanner.finishEndNotes(); if (generateMissing) scanner.generateMissingEndNotes(); } } // command-line entry point public static void main(String... av) throws IOException { // with no args, assume sample data file, and use all possible options if (av.length == 0) av = new String[] { "-s", "-d", "-g", "sample-data.txt" }; new EndNotes().run(av); } public void run(String... av) throws IOException { File file = null; for (String arg : av) { if (arg.startsWith("-")) { if (arg.equals("-s") || arg.equals("--sort")) { resortDefs = true; } else if (arg.equals("-u") || arg.equals("--uses")) { renumberByDef = false; } else if (arg.equals("-d") || arg.equals("--defs")) { renumberByDef = true; } else if (arg.equals("-g") || arg.equals("--generate-missing")) { generateMissing = true; } else { usage(); } } else { if (file != null) { usage(); } file = new File(arg); } } if (file == null) { usage(); } out = new BufferedWriter(new OutputStreamWriter(System.out)); doFile(file); out.flush(); } private static void usage() { throw new IllegalArgumentException( "Usage: EndNotes [--sort | --uses | --defs | --] input.txt > output.txt"); } };