import java.util.*;
import java.io.*;
import java.util.regex.*;
/**
* Renumber endnotes in an almost-flat text file.
*
* This example is structured as a driver class plus one or more
* inner classes which function as workers. The driver class presents
* the API to the rest of the world, including a main routine
* and (potentially) a set of option switches and entry points.
* The worker class is private to the driver, and embodies the
* pass structure of the problem.
*
* Most program elements are declared as having default access,
* which means they are private to this class and its siblings
* in the same package. Upgrading this example to a reusable
* API would require moving it to a named package, deciding
* which elements ought to be made private, and adding public
* API elements to manage option control and invocation.
*
* This example code was developed on a MacBook Pro using NetBeans 6.1.
*
* @author john.rose@sun.com
*/
public class EndNotes {
// --- options (note: we could put these under an API)
/** Regexp for endnote uses. */
Pattern usePattern = Pattern.compile("\\[([0-9]+)\\]");
/** Regexp for endnote definitions. */
Pattern defPattern = Pattern.compile("(?<=^\\s*)\\[([0-9]+)\\]");
/** Replacement expression for the notation we are transforming. */
String noteRewrite = "[%s]";
/** Regexp for the marker separating body from endnotes. */
String boundaryPattern = "^\\s*@footnotes?:\\s*$";
/** How lines are separated on output. */
String lineSeparator = System.getProperty("line.separator", "\n");
/** If true, definitions are resorted. */
boolean resortDefs;
/** If true, definitions are renumbered in compact ascending order.
* Otherwise, initial uses are renumbered in compact ascending order.
* (+++ Bonus feature.)
*/
boolean renumberByDef;
/** If true, generate placeholder lines for missing definitions.
* (+++ Bonus feature.)
*/
boolean generateMissing;
/** Map endnote references to their new numberings. */
TreeMap oldNotes = new TreeMap();
/** Next note number to generate into oldNotes.values. */
int nextNewNote = 1;
/** Output channel. */
Writer out;
/**
* Given an old endnote numeral, find or create
* a new endnote number. The new numbers are produced in a compact
* sequence beginning with {@link #nextNewNote}.
* The ordering of calls to this method is significant,
* because the first call to a previously unseen numeral
* will always return the value of {@link #nextNewNote}.
* @param oldnum the old numeral (without brackets)
* @return new number
*/
int getNewNumber(String oldnum) {
Integer newnum = oldNotes.get(oldnum);
if (newnum == null) {
oldNotes.put(oldnum, newnum = nextNewNote++);
}
return (int) newnum;
}
/**
* Find all endnote occurrences in the given string, and renumber them.
* @param line a line of text; may contain embedded line separators
* @param pattern the endnote pattern; group(1) is replaced
* @return the original line with endnotes renumbered
*/
String replaceNotes(String line, Pattern pattern) {
Matcher m = pattern.matcher(line);
StringBuffer sb = null;
while (m.find()) {
int newnum = getNewNumber(m.group(1));
if (newnum < 0) {
continue;
}
if (sb == null) {
sb = new StringBuffer(line.length() + 20);
}
String newnote = String.format(noteRewrite, newnum);
m.appendReplacement(sb, newnote);
}
return (sb == null) ? line : m.appendTail(sb).toString();
}
/** The processing loop, with pluggable behaviors.
* It is an inner class (non-static) so that the main
* class can supply environmental settings as if they were
* global variables.
*/
class Scanner {
BufferedReader in;
void doFile(File file) throws IOException {
in = new BufferedReader(new FileReader(file));
try {
String lastLine = doBody();
if (lastLine != null)
doEndNotes(lastLine);
} finally {
in.close();
in = null;
}
}
String doBody() throws IOException {
for (String line; (line = in.readLine()) != null;) {
if (line.matches(boundaryPattern))
return line;
putLine(doBodyLine(line));
}
return null;
}
String doBodyLine(String line) {
return replaceNotes(line, usePattern);
}
void doEndNotes(String line) throws IOException {
for (; line != null; line = in.readLine())
putLine(doEndNoteLine(line));
}
String doEndNoteLine(String line) {
return replaceNotes(line, defPattern);
}
void putLine(String line) throws IOException {
if (line == null) return;
out.write(line);
out.write(lineSeparator);
}
}
/** Use this for sorting endnotes. */
class NoteComparator implements Comparator {
public int compare(String n1, String n2) {
Matcher m1 = defPattern.matcher(n1);
Matcher m2 = defPattern.matcher(n2);
int c = 0;
if (!m1.find()) c = m2.find() ? -1 : 0;
else if (!m2.find()) c = 1;
else c = comparePrimary(m1, m2);
// secondary key is the whole line:
if (c == 0) c = n1.compareTo(n2);
return c;
}
/** Use the note prefix from the strings as a primary key. */
int comparePrimary(Matcher m1, Matcher m2) {
Integer i1 = getNewNumber(m1.group(1));
Integer i2 = getNewNumber(m2.group(1));
return i1.compareTo(i2);
}
};
/** Variant of Scanner which collects endnotes, rather than
* immediately renumbering them.
*/
class CollectingScanner extends Scanner {
int endNote;
ArrayList endText = new ArrayList();
@Override
String doEndNoteLine(String line) {
if (defPattern.matcher(line).find()) {
++endNote;
}
if (endText.size() <= endNote) {
endText.add(line);
assert(endText.size() == endNote+1);
} else { // collect multi-line note
endText.set(endNote, endText.get(endNote)
+ lineSeparator + line);
}
return null; // tell putLine to do nothing
}
List endNotes() {
return endText.subList(1, endText.size());
}
void finishEndNotes() throws IOException {
for (String line : endText)
putLine(super.doEndNoteLine(line));
}
// +++ BONUS FEATURE
void generateMissingEndNotes() throws IOException {
TreeSet missing = new TreeSet();
for (int i = 1; i < nextNewNote; i++)
missing.add(i);
for (String line : endNotes()) {
Matcher m = defPattern.matcher(line);
if (m.find())
missing.remove(getNewNumber(m.group(1)));
}
for (int i : missing) {
String gen = String.format(noteRewrite, i)+" (missing)";
putLine(gen);
}
}
}
public void doFile(File file) throws IOException {
if (renumberByDef) {
// +++ BONUS FEATURE: scan endnotes before body +++
// We will use anonymous inner classes to add the extra methods.
// must visit the defs section first
CollectingScanner firstPass = new CollectingScanner() {
// totally ignore body lines
@Override String doBodyLine(String line) { return null; }
// produce no output at all
@Override void putLine(String line) { }
};
firstPass.doFile(file);
if (resortDefs) {
Collections.sort(firstPass.endNotes(), new NoteComparator() {
/** First strip the note prefix from the strings, then compare. */
@Override int comparePrimary(Matcher m1, Matcher m2) {
return m1.replaceFirst("").compareToIgnoreCase(m2.replaceFirst(""));
}
});
}
firstPass.finishEndNotes();
}
// streaming pass over the data, buffering one body line at a time
if (!resortDefs && !generateMissing) {
new Scanner().doFile(file);
} else {
CollectingScanner scanner = new CollectingScanner();
scanner.doFile(file);
if (resortDefs)
Collections.sort(scanner.endNotes(), new NoteComparator());
scanner.finishEndNotes();
if (generateMissing)
scanner.generateMissingEndNotes();
}
}
// command-line entry point
public static void main(String... av) throws IOException {
// with no args, assume sample data file, and use all possible options
if (av.length == 0) av = new String[] { "-s", "-d", "-g", "sample-data.txt" };
new EndNotes().run(av);
}
public void run(String... av) throws IOException {
File file = null;
for (String arg : av) {
if (arg.startsWith("-")) {
if (arg.equals("-s") || arg.equals("--sort")) {
resortDefs = true;
} else if (arg.equals("-u") || arg.equals("--uses")) {
renumberByDef = false;
} else if (arg.equals("-d") || arg.equals("--defs")) {
renumberByDef = true;
} else if (arg.equals("-g") || arg.equals("--generate-missing")) {
generateMissing = true;
} else {
usage();
}
} else {
if (file != null) {
usage();
}
file = new File(arg);
}
}
if (file == null) {
usage();
}
out = new BufferedWriter(new OutputStreamWriter(System.out));
doFile(file);
out.flush();
}
private static void usage() {
throw new IllegalArgumentException(
"Usage: EndNotes [--sort | --uses | --defs | --] input.txt > output.txt");
}
};