/************************************************************************** /* A utility for making Lucene Documents from a File. This is a modified /* version of src/demo/org/apache/lucene/demo/FileDocument.java from the /* Lucene distribution. See COPYING.lucene in the toplevel directory for /* copyright information about Lucene. /* /* Copyright (c) 2003-2004 by Bernhard Bablok (mail@bablokb.de) /* /* This library is free software; you can redistribute it and/or modify /* it under the terms of the GNU Lesser General Public License as published /* by the Free Software Foundation; either version 2 of the License or /* (at your option) any later version. /* /* This library is distributed in the hope that it will be useful, but /* WITHOUT ANY WARRANTY; without even the implied warranty of /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the /* GNU Lesser General Public License for more details. /* /* You should have received a copy of the GNU Lesser General Public License /* along with this library; see the file COPYING.LESSER. If not, write to /* the Free Software Foundation Inc., 59 Temple Place - Suite 330, /* Boston, MA 02111-1307 USA /**************************************************************************/ package de.bablokb.luala.prototype; import java.io.File; import java.io.Reader; import java.io.FileInputStream; import java.io.BufferedReader; import java.io.InputStreamReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.DateField; /** A utility for making Lucene Documents from a File. @version $Revision: 1.5 $ @author $Author: bablokb $ */ public class FileDocument { /** Makes a document for a File.
The document has three fields:
path
--containing the pathname of the file, as a stored,
tokenized field;
modified
--containing the last modified date of the file as
a keyword field as encoded by DateField; and
content
--containing the full contents of the file, as a
Reader field;
*/
public Document createDocument(File f)
throws java.io.IOException {
// make a new, empty document
Document doc = new Document();
// Add the path of the file as a field named "path". Use a Text field, so
// that the index stores the path, and so that the path is searchable
doc.add(Field.Text("path", f.getPath()));
// Add the last modified date of the file a field named "modified". Use a
// Keyword field, so that it's searchable, but so that no attempt is made
// to tokenize the field into words.
doc.add(Field.Keyword("modified",
DateField.timeToString(f.lastModified())));
// Add the contents of the file a field named "contents". Use a Text
// field, specifying a Reader, so that the text of the file is tokenized.
// ?? why doesn't FileReader work here ??
FileInputStream is = new FileInputStream(f);
Reader reader = new BufferedReader(new InputStreamReader(is));
doc.add(Field.Text("contents", reader));
// return the document
return doc;
}
}