How to add multiple boolean queries for lucene
I have 10 million lucene docs that look like this:
{
"0": 230,
"1": 12,
"2": 611,
"3": 800
}
I am trying to find all documents that have all fields less than 10. Here is the lucene code I have:
BooleanQuery bq = new BooleanQuery();
bq.Add(NumericRangeQuery.NewIntRange("0", 1, 10, true, true), Occur.MUST);
bq.Add(NumericRangeQuery.NewIntRange("1", 1, 10 , true, true), Occur.MUST);
bq.Add(NumericRangeQuery.NewIntRange("2", 1, 10, true, true), Occur.MUST);
//bq.Add(NumericRangeQuery.NewIntRange("3", 1, 1000, true, true), Occur.MUST);
TopDocs hits = searcher.Search(bq, 10);
int counter = 0;
foreach (ScoreDoc scoreDoc in hits.ScoreDocs)
{
Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc);
Console.WriteLine("3: " + doc.Get("3"));
counter++;
}
The problem I am facing, when I check all 4 properties to see if all 4 properties are between 1 and 10, I get no results. When I check the first 3 properties, I get correct results. But when I add the fourth, I get nothing. As you can see, the fourth logical sentence is commented out because it will not produce any results. I even did a fourth property check across the 1 to 1000 range and I still haven't got any results. Am I doing something wrong? This is how I build the index.
public static void BuildIndex()
{
Directory directory = FSDirectory.Open(new System.IO.DirectoryInfo("C:\\Users\\Luke\\Desktop\\1"));
Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
IndexWriter writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(100000));
for (int x = 0; x < 10000000; x++)
{
Document doc = new Document();
doc.Add(new NumericField("id", 100000, Field.Store.YES, true).SetIntValue(x));
for (int i = 0; i < 5; i++)
{
doc.Add(new NumericField(i.ToString(), 100000, Field.Store.YES, true).SetIntValue(rand.Next(1, 1000)));
}
writer.AddDocument(doc);
if (x % 500 == 0)
{
Console.WriteLine(x);
}
}
writer.Optimize();
writer.Flush(true, true, true);
writer.Dispose();
directory.Dispose();
Console.WriteLine("done");
Console.Read();
}
source to share
I just recreated this program in Java Lucene (4.4) and I don't see any problem in querying a numeric range.
1) 3 Documents
field:0 - value:137
field:1 - value:41
field:2 - value:908
field:3 - value:871
field:4 - value:686
field:0 - value:598
field:1 - value:623
field:2 - value:527
field:3 - value:364
field:4 - value:800
field:0 - value:96
field:1 - value:301
field:2 - value:323
field:3 - value:94
field:4 - value:653
2) Indexer
package com.numericrange;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class IndexBuilder
{
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException
{
Directory dir = FSDirectory.open(new File("/Users/Lucene/indexes"));
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, new StandardAnalyzer(Version.LUCENE_44));
iwc.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, iwc);
for (int x = 0; x < 3; x++)
{
Document doc = new Document();
IntField iFldOut = new IntField("id", 6, Field.Store.YES);
iFldOut.setIntValue(x);
doc.add(iFldOut);
for (int i = 0; i < 5; i++)
{
int randomVal = (int)(Math.random() * 1000) + 1;
IntField iFld = new IntField(Integer.toString(i), 6, Field.Store.YES);
iFld.setIntValue(randomVal);
doc.add(iFld);
System.out.println("i:" + i + " - Random Value:" + randomVal);
}
writer.addDocument(doc);
}
int newNumDocs = writer.numDocs();
System.out.println("************************");
System.out.println(newNumDocs + " documents added.");
System.out.println("************************");
writer.close();
}
}
3) Search
package com.numericrange;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class NumericQueryDemo
{
public static void main(String[] args) throws IOException, Exception
{
// Use Indexes from existing folder
String dirPath = "/Users/Lucene/indexes";
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(dirPath)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
BooleanQuery bq = new BooleanQuery();
bq.add(NumericRangeQuery.newIntRange("0", 100, 600, true, true), Occur.MUST);
bq.add(NumericRangeQuery.newIntRange("1", 40, 700, true, true), Occur.MUST);
bq.add(NumericRangeQuery.newIntRange("2", 500, 1000, true, true), Occur.MUST);
bq.add(NumericRangeQuery.newIntRange("3", 300, 900, true, true), Occur.MUST);
bq.add(NumericRangeQuery.newIntRange("4", 600, 800, true, true), Occur.MUST);
System.out.println("Query Data:" + bq.toString());
TopScoreDocCollector collector = TopScoreDocCollector.create(500, true);
long startTime = System.currentTimeMillis();
searcher.search(bq, collector);
System.out.println("Search Time: "+(System.currentTimeMillis() - startTime)+"ms");
// Display Results
ScoreDoc[] hits = collector.topDocs().scoreDocs;
System.out.println("Found " + hits.length + " hits.");
for(int i=0; i < hits.length; ++i)
{
int docId = hits[i].doc;
Document d = searcher.doc(docId);
System.out.println((i + 1) + ". " + hits[i].score + " "+ d.get("id") + " ==== " + d.get("0") +
" ==== " + d.get("1") + " ==== " + d.get("2") + " ==== " + d.get("3") + " ==== " + d.get("4"));
}
}
}
4) Search results
Query Data:+0:[100 TO 600] +1:[40 TO 700] +2:[500 TO 1000] +3:[300 TO 900] +4:[600 TO 800]
Search Time: 27ms
Found 2 hits.
1. 2.236068 0 ==== 137 ==== 41 ==== 908 ==== 871 ==== 686
2. 2.236068 1 ==== 598 ==== 623 ==== 527 ==== 364 ==== 800
As you can see, I am using the precisionStep value as "6". I confirmed that the documents were correctly indexed through Luke, and also fired the same request through Luke.
Can you try running a query through the Luke interface? change the values as per your docs.
+0: [100 TO 600] +1: [40 TO 700] +2: [500 to 1000] +3: [300 TO 900] +4: [600 to 800]
source to share