Logo Search packages:      
Sourcecode: pauker version File versions  Download package

SearchEngine.java

/*
 * SearchEngine.java
 *
 * Created on 5. September 2005, 21:41
 *
 */

package pauker.program;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.HitCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.RAMDirectory;

/**
 * a search engine for pauker cards based on apache lucene
 * @author Ronny.Standtke@gmx.net
 */
00035 public class SearchEngine {

    /**
     * the identifier for searches at the front side
     */
00040     public static final String FRONT_SIDE = "frontSide";
    /**
     * the identifier for searches at the reverse side
     */
00044     public static final String REVERSE_SIDE = "reverseSide";
    private static final String OBJECT_ID = "objectId";
    private static final Logger logger =
            Logger.getLogger(SearchEngine.class.getName());
    private final Analyzer analyzer;
    private final RAMDirectory ramDirectory;
    // we need this HashMap for holding a reference to the "real" card
    // (the search result of lucence alone is not useful)
    private final Map<String, Card> indexedCards;
    private int searchLimit;

    /** Creates a new instance of SearchEngine */
00056     public SearchEngine() {
        // !!!!!!!!!!
        // The StandardAnalyzer filters out too many things.
        // We would not find any similar mathematical equations.
        // Therefore we better use the simple WhitespaceAnalyzer.
        // analyzer  = new StandardAnalyzer();
        // !!!!!!!!!!
        analyzer = new WhitespaceAnalyzer();
        ramDirectory = new RAMDirectory();
        indexedCards = new HashMap<String, Card>();
        searchLimit = 1;
        privateInit();
    }

    /**
     * must be called when the search index should be resetted
     * (e.g. for a new lesson or open file)
     */
00074     public void init() {
        privateInit();
    }

    /**
     * method is indexing all cards of a lesson (used when opening a lesson)
     * @param lesson the lesson to index
     */
00082     public void index(Lesson lesson) {
        try {
            IndexWriter indexWriter = new IndexWriter(ramDirectory, analyzer,
                    false, IndexWriter.MaxFieldLength.UNLIMITED);
            for (Card card : lesson.getCards()) {
                addCard(indexWriter, card);
            }
            indexWriter.optimize();
            indexWriter.close();
        } catch (IOException e) {
            logger.log(Level.SEVERE, null, e);
        }
    }

    /**
     * method to index a single new card or index changes of a card
     * (new card, edit card, import lesson)
     * @param card the card to index
     */
00101     public void index(Card card) {
        // when the card was already indexed we have to remove it
        removeCard(card);
        try {
            IndexWriter indexWriter = new IndexWriter(ramDirectory, analyzer,
                    false, IndexWriter.MaxFieldLength.UNLIMITED);
            addCard(indexWriter, card);
            // this is horribly slow, dont do this for single cards...
            // indexWriter.optimize();
            indexWriter.close();
        } catch (IOException e) {
            logger.log(Level.SEVERE, null, e);
        }
    }

    /**
     * remove a card from the index
     * @param card the card to be removed
     */
00120     public void removeCard(Card card) {
        String cardID = card.getId();
        // delete from lucene index
        try {
            IndexReader reader = IndexReader.open(ramDirectory);
            // we do not care if the card was really there already...
            reader.deleteDocuments(new Term(OBJECT_ID, cardID));
            reader.close();
        } catch (IOException e) {
            logger.log(Level.SEVERE, null, e);
        }
        // remove from internal HashMap
        indexedCards.remove(cardID);
    }

    /**
     * sets a search limit (minimum number of letters a word must contain to be
     * searched)
     * @param searchLimit the search limit
     */
00140     public void setSearchLimit(int searchLimit) {
        this.searchLimit = searchLimit;
    }

    /**
     * searches for similar cards
     * @param searchString the string that the user already has inserted at the
     * card side
     * @param side the card side identifier string
     * @return similar cards
     */
00151     public List<Card> search(String searchString, String side) {
        final List<Card> similarCards = new ArrayList<Card>();

        try {
            final IndexSearcher indexSearcher = new IndexSearcher(ramDirectory);

            // tokenize searchString and add a "~" to the end of each token
            // -> means "fuzzy search" for indexSearcher
            String[] tokens = searchString.split("\\s");
            StringBuilder stringBuilder = new StringBuilder();
            for (String token : tokens) {
                if (token.length() >= searchLimit) {
                    token = QueryParser.escape(token);
                    stringBuilder.append(token);
                    stringBuilder.append("~");
                    stringBuilder.append(" ");
                }
            }
            String queryString = stringBuilder.toString();
            if (queryString.length() > 0) {
                // execute the query
                QueryParser queryParser = new QueryParser(side, analyzer);
                Query query = queryParser.parse(queryString);
                HitCollector hitCollector = new HitCollector() {

                    @Override
                    public void collect(int doc, float score) {
                        try {
                            Document document = indexSearcher.doc(doc);
                            String cardID = document.get(OBJECT_ID);
                            Card similarCard = indexedCards.get(cardID);
                            similarCards.add(similarCard);
                        } catch (CorruptIndexException ex) {
                            logger.log(Level.SEVERE, null, ex);
                        } catch (IOException ex) {
                            logger.log(Level.SEVERE, null, ex);
                        }
                    }
                };
                indexSearcher.search(query, hitCollector);
            }

        } catch (Exception e) {
            logger.log(Level.SEVERE, null, e);
        }

        return similarCards;
    }
    
    private void privateInit() {
        try {
            IndexWriter indexWriter = new IndexWriter(ramDirectory, analyzer,
                    true, IndexWriter.MaxFieldLength.UNLIMITED);
            indexWriter.close();
        } catch (IOException e) {
            logger.log(Level.SEVERE, null, e);
        }
        indexedCards.clear();
    }

    private void addCard(IndexWriter indexWriter, Card card)
            throws IOException {
        String cardID = card.getId();
        String frontSideText = card.getFrontSide().getText();
        String reverseSideText = card.getReverseSide().getText();

        if ((cardID != null) && (frontSideText != null) &&
                (reverseSideText != null)) {
            // add card to lucene index
            Document document = new Document();
            document.add(new Field(OBJECT_ID, cardID,
                    Field.Store.YES, Field.Index.NOT_ANALYZED));
            document.add(new Field(FRONT_SIDE, frontSideText,
                    Field.Store.YES, Field.Index.ANALYZED));
            document.add(new Field(REVERSE_SIDE, reverseSideText,
                    Field.Store.YES, Field.Index.ANALYZED));
            indexWriter.addDocument(document);
            // put card to internal HashMap
            indexedCards.put(cardID, card);
        }
    }
}

Generated by  Doxygen 1.6.0   Back to index