/************************************************************************** /* A factory for creating Lucene Documents. This factory actually only /* creates a number of standard fields (path, type, date). /* /* Copyright (c) 2003-2004 by Bernhard Bablok (mail@bablokb.de) /* /* This library is free software; you can redistribute it and/or modify /* it under the terms of the GNU Lesser General Public License as published /* by the Free Software Foundation; either version 2 of the License or /* (at your option) any later version. /* /* This library is distributed in the hope that it will be useful, but /* WITHOUT ANY WARRANTY; without even the implied warranty of /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the /* GNU Lesser General Public License for more details. /* /* You should have received a copy of the GNU Lesser General Public License /* along with this library; see the file COPYING.LESSER. If not, write to /* the Free Software Foundation Inc., 59 Temple Place - Suite 330, /* Boston, MA 02111-1307 USA /**************************************************************************/ package de.bablokb.luala.lib; import java.io.*; import org.apache.lucene.document.*; /** A factory for creating Lucene Documents. This factory actually only creates a number of standard fields (path, type, date). The content has to be added by subclasses. This factory can be used for binary files, since it stores at least the meta-info.
The document has three fields:
StandardDocumentFactory.PATH
:
containing the pathname of the file, as a stored, tokenized field;
StandardDocument.DATE
:
the last-modified date of the file, as a searchable, non-tokenized field;
StandardDocumentFactory.TYPE
:
the classname of StandardDocumentFactory.
Subclasses should call the {@link #addContent}-method.
@version $Revision: 1.19 $ @author $Author: bablokb $ */ public class StandardDocumentFactory implements DocumentFactory { //////////////////////////////////////////////////////////////////////////// /** Field for length of summary. */ private int iSummaryLength = 512; //////////////////////////////////////////////////////////////////////////// /** Return summary length. */ public int getSummaryLength() { return iSummaryLength; } //////////////////////////////////////////////////////////////////////////// /** Add the content. @param doc The document @param reader The reader to add to the document. */ public void addContent(Document doc, Reader reader) { doc.add(Field.Text(CONTENT,reader)); } //////////////////////////////////////////////////////////////////////////// /** Add a summary (beginning of text) to the document. Should be called before {@link #addContent}. @param doc The Lucene-document @param reader A reader */ public void addSummary(Document doc, Reader reader) throws IOException { try { reader.mark(iSummaryLength+1); reader.reset(); // to bail out if reset() is not supported! reader.mark(iSummaryLength+1); char[] summary = new char[iSummaryLength]; int size = reader.read(summary,0,summary.length); doc.add(Field.UnIndexed(SUMMARY,new String(summary,0,size))); reader.reset(); } catch (Exception e) { } } //////////////////////////////////////////////////////////////////////////// /** Set the type-field of this document. @param doc The document */ public void setType(Document doc) { doc.add(Field.UnIndexed(TYPE,"application/octet-stream")); } //////////////////////////////////////////////////////////////////////////// /** Create a Lucene-Document. @param name The name of the source-document @return A {@link org.apache.lucene.document.Document} @throws FactoryException */ public Document createDocument(String name) throws FactoryException { try { Document doc = new Document(); doc.add(Field.Text(PATH,name)); doc.add(Field.Keyword(DATE_MOD, DateField.timeToString((new File(name)).lastModified()))); setType(doc); return doc; } catch (Exception e) { throw new FactoryException(e); } } }