package test.werner.nio; import java.io.*; import java.nio.*; import java.nio.channels.*; import java.nio.charset.*; import java.util.HashMap; import java.util.SortedMap; import java.util.TreeMap; import java.util.regex.*; /*** * * First: read the file reverse. * * ===> all indices mapped into a oldIndexMap ( index , text ) * and * a newIndexMap ( oldindex , undef ) * * Second: parse through the file remap the index in the newIndexMap * and print content and new index. * * Print sorted index : newIndex + oldIndexMap( oldIndex ) * * Performance: * Zeit: * 55 MByte 36 sec * Speicher : * 170Mbyte + 180Mbyte virtuell * * * @author werner wetjen * */ public class Indexer { private static long start, end; // private static String indexMarker = "@footnote:" ; // Charset and decoder for ISO-8859-15 private static Charset charset = Charset.forName("ISO-8859-15"); private static CharsetDecoder decoder = charset.newDecoder(); // Pattern used to parse lines private static Pattern linePattern = Pattern.compile(".*\r\n"); // = Pattern.compile(".*\r?\n"); private static Pattern bibRefPattern =Pattern.compile("\\[([0-9]+)\\]"); // The input pattern that we're looking for private static Pattern pattern; // contains key=oldIndex --> indextText private static HashMap oldIndexMap = new HashMap(); // contains key=oldIndex --> newIndex private static HashMap newIndexMap = new HashMap(); // the indexCounter used to renumber the indexes private static int newIndex=1; // Compile the pattern from the command line // private static void compile(String pat) { try { pattern = Pattern.compile(pat); } catch (PatternSyntaxException x) { System.err.println(x.getMessage()); System.exit(1); } } // Use the linePattern to break the given CharBuffer into lines, applying // the input pattern to each line to see if we have a match // private static long grep(File f, CharBuffer cb, long lines, Boolean done) { Matcher lm = linePattern.matcher(cb); // Line matcher Matcher pm = null; // Pattern matcher CharSequence cs =null; while (lm.find()) { cs = lm.group(); // The current line if(cs.toString().startsWith(indexMarker)) { done=true; return lines; } if (pm == null) pm = bibRefPattern.matcher(cs); else pm.reset(cs); if (pm.find()) { String line=cs.toString(); String index= line.substring(pm.start()+1, pm.end()-1); String begin= line.substring(0,pm.start()); String end=line.substring(pm.end()); // replace the oldIndex in the Map with the new one int oldIndex=Integer.parseInt(index); // check first if index is already mapped Integer indexE=newIndexMap.get(new Integer(oldIndex)); if (indexE != null && indexE.intValue()== -1 ) { // replace the oldIndex newIndexMap.put(new Integer(oldIndex), new Integer(newIndex)); newIndex++; } // use the corrected index indexE=newIndexMap.get(new Integer(oldIndex)); System.out.print("l:=" + lines + "<:" + cs); System.out.print( " >:" + begin + "[" + Integer.toString(indexE) + "]"+ end ); }else System.out.print(cs); if (lm.end() == cb.limit()) { break; } lines++; } // System.out.print(f + " blockEnd:" + lines + ":" + cs); return lines; } /** * It's not only a reverse search, but fill * the oldIndexMap. * * @param f * @param cb * @return */ private static Matcher indexMatcher = null ; // bibRefPattern.matcher(""); private static boolean reverseGrep(File f, CharBuffer cb ,HashMap oldIndexMap ) { Matcher lm = linePattern.matcher(cb); // Line matcher Matcher pm = null; // Pattern matcher boolean done=false; CharSequence cs =null; while (lm.find()) { cs = lm.group(); // The current line String indexEntry=cs.toString(); // System.out.println("l:=" + indexEntry); if(indexEntry.startsWith(indexMarker)) done=true; if(indexMatcher==null) indexMatcher = bibRefPattern.matcher(cs); else indexMatcher = indexMatcher.reset(cs); indexMatcher.reset(cs); if(indexMatcher.find()) { String entry= indexEntry.substring(indexEntry.indexOf(']')+2); String index = indexEntry.substring( indexEntry.indexOf('[')+ 1, indexEntry.indexOf(']')); int intIndex=Integer.parseInt(index); // System.out.print(" add to old Map: " + cs); // add the index to the map: oldIndexMap.put(new Integer(intIndex), entry); newIndexMap.put(new Integer(intIndex),new Integer(-1)); } if (lm.end() == cb.limit()) { break; } } // System.out.print(f + " blockEnd:" + lines + ":" + cs); return done; } // Search for occurrences of the input pattern in the given file // private static void grep(File f) throws IOException { // Open the file and then get a channel from the stream FileInputStream fis = new FileInputStream(f); FileChannel fc = fis.getChannel(); // Get the file's size and then map it into memory long sz = (int)fc.size(); long BS=1000000; long count = sz-BS; long position= 0; long lines=1; Boolean done=new Boolean(false); while ( count > -BS && ! done ) { //MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, position, (count >= BS ) ? position+ BS : position + count ); MappedByteBuffer bb = null; if( count <0 ) bb = fc.map(FileChannel.MapMode.READ_ONLY, position, BS+count ); else bb = fc.map(FileChannel.MapMode.READ_ONLY, position, BS ); // System.out.println("pos:=" + position ); // Decode the file into a char buffer CharBuffer cb = decoder.decode(bb); // Perform the search lines=grep(f, cb, lines, done ); count -= BS; position+=BS; } // Close the channel and the stream fc.close(); } private static void reverseGrep(File f) throws IOException { // Open the file and then get a channel from the stream FileInputStream fis = new FileInputStream(f); FileChannel fc = fis.getChannel(); // Get the file's size and then map it into memory long sz = (int)fc.size(); long BS=1000; long count = sz-BS; long position= sz-BS; while ( count > -BS ) { MappedByteBuffer bb = null; if( count <0 ) bb = fc.map(FileChannel.MapMode.READ_ONLY, position, BS+count ); else bb = fc.map(FileChannel.MapMode.READ_ONLY, position, BS ); // System.out.println("pos:=" + position ); // Decode the file into a char buffer CharBuffer cb = decoder.decode(bb); // Perform the search if ( reverseGrep(f, cb, oldIndexMap)) return ; // lines=grep(f, cb, lines); count -= BS; position-=BS; } // Close the channel and the stream fc.close(); } public static void main(String[] args) { start = System.currentTimeMillis(); File pwd = new File(""); System.out.println("Grep is running at pwd:=" + pwd.getAbsolutePath()); if (args.length < 2) { System.err.println("Usage: java Grep pattern file..."); return; } compile(args[0]); try { File f = new File(args[1]); // fill indexMap reverseGrep(f); printIndexMap(); try { grep(f); printNewIndexMap(); } catch (IOException x) { System.err.println(f + ": " + x); } } catch ( Exception ex) { ex.printStackTrace(); System.err.println(""); } end = System.currentTimeMillis(); System.out.println("End grep duration:=" + (end -start)/ 1000 ); } private static void printNewIndexMap() { // first sort: SortedMap newSortedIndexMap= new TreeMap(); for ( Integer key : newIndexMap.keySet()) { System.out.println("" ); newSortedIndexMap.put(newIndexMap.get(key), key); } // now print the index for ( Integer key : newSortedIndexMap.keySet()) { //System.out.println("k:=" + key // + " v:=" + oldIndexMap.get(newSortedIndexMap.get(key))); System.out.print("[" + key + "] " + oldIndexMap.get(newSortedIndexMap.get(key))); } } /** * for debugging * */ private static void printIndexMap() { for ( Integer key : oldIndexMap.keySet() ) { System.out.print("k:=" + key + " e:=" + oldIndexMap.get(key)); } } }