Highlighter at lucene.net does not work for wildchard and fuzzy search

highlighter using lucene.net (3.0.3) which doesn't work for below code. If I search for the word "deal" the highlighter shows, but if I search for a word with wildchar "deal *" then there is no selection

protected void btnIndex_Click(object sender, EventArgs e)
    {
        string indexPath = @"D:\temp\LuceneIndex1";

        Lucene.Net.Store.Directory directory = FSDirectory.Open(indexPath);
        Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
        IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);


        IndexReader red = IndexReader.Open(directory, true);
        int totDocs = red.MaxDoc;
        red.Close();

        //Add documents to the index
        string text = String.Empty;
        text = "One thing that may be of interest, is that if you are dealing with vast  quantites of data you may want to create static Field fields and  reuse them rather than creating new one each time you rebuild the index.  Obviously for this demo the Lucene index is only created once per application  run, but in a production application you may build the index every 5 mins or  something like that, in which case I would recommend reusing the Field objects by making static fields that get re-used.";


        int txts = totDocs;
        AddTextToIndex(txts++, text, writer);

        writer.Optimize();      
        writer.Dispose();
        //Setup searcher
        IndexSearcher searcher = new IndexSearcher(directory);
        QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "postBody", analyzer);
        text = txtSearchData.Text;

        Label1.Text = Search(text, searcher, parser, analyzer);



        //Clean up everything
        searcher.Close();
        directory.Close();
    }
    private static void AddTextToIndex(int txts, string text, IndexWriter writer)
    {
        Document doc = new Document();
        doc.Add(new Field("id", txts.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
        doc.Add(new Field("postBody", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
        writer.AddDocument(doc);
    }
    private string Search(string text, IndexSearcher searcher, QueryParser parser, Analyzer analyzer)
    {
        string indexPath = @"D:\temp\LuceneIndex1";

        Lucene.Net.Store.Directory directory = FSDirectory.Open(indexPath);
        string result = "";
        string snip = "";
        var booleanQuery = new BooleanQuery();

        var fuzzyQuery = new FuzzyQuery(new Term("postBody", text), 0.7f, 3);
        booleanQuery.Add(new BooleanClause(fuzzyQuery, Occur.SHOULD));


        //Supply conditions
        Query query = parser.Parse(text);
        FastVectorHighlighter highlighter = getHighlighter();

        parser.AllowLeadingWildcard = true;
        query = parser.Parse(text);
        BooleanQuery.MaxClauseCount = 10;
        query = query.Rewrite(IndexReader.Open(directory, true));

        query.Rewrite(IndexReader.Open(directory, true));
        FieldQuery fieldQuery = highlighter.GetFieldQuery(booleanQuery);

        TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true);

        searcher.Search(query, collector);
        ScoreDoc[] hits = collector.TopDocs().ScoreDocs;


        int results = hits.Length;
        Console.WriteLine("Found {0} results", results);
        for (int i = 0; i < hits.Length; i++)
        {
            int docId = hits[i].Doc;
            float score = hits[i].Score;

            Lucene.Net.Documents.Document doc = searcher.Doc(docId);

            result = "Score: " + score.ToString() +
                           " Field: " + doc.Get("id") +
                           " Field2: " + doc.Get("postBody");

            string text1 = doc.Get("postBody");
            string[] hight = getFragmentsWithHighlightedTerms(analyzer, query, "postBody", text1, 5, 100, directory);             

        }        

        return result + " :::: " + snip;
    }

    private FastVectorHighlighter getHighlighter()
    {
        FragListBuilder fragListBuilder = new SimpleFragListBuilder();
        FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(
                BaseFragmentsBuilder.COLORED_PRE_TAGS,
                BaseFragmentsBuilder.COLORED_POST_TAGS);
        return new FastVectorHighlighter(true, true, fragListBuilder,
                fragmentsBuilder);
    }
    private static String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, string fieldName, string fieldContents, int fragmentSize, int maxsize, Lucene.Net.Store.Directory directory)
    {
        TokenStream stream = TokenSources.GetTokenStream(fieldName, fieldContents, analyzer);
        // SpanScorer scorer = new SpanScorer();//(query, fieldName, new CachingTokenFilter(stream));
        query = query.Rewrite(IndexReader.Open(directory, true));
        QueryScorer scorer = new QueryScorer(query, fieldName);
        scorer.IsExpandMultiTermQuery = true;// (true); 
        SimpleSpanFragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentSize);

        Highlighter highlighter = new Highlighter(scorer);
        highlighter.TextFragmenter = fragmenter;
        highlighter.MaxDocCharsToAnalyze = maxsize;

        String[] fragments = highlighter.GetBestFragments(stream, fieldContents, 10);

        return fragments;
    }

      

+3


source to share





All Articles