Added ParallelMultiSearcher. · apache/lucene-solr@b1541ce (original) (raw)
``
1
`+
package org.apache.lucene.search;
`
``
2
+
``
3
`+
/* ====================================================================
`
``
4
`+
- The Apache Software License, Version 1.1
`
``
5
`+
`
``
6
`+
- Copyright (c) 2004 The Apache Software Foundation. All rights
`
``
7
`+
- reserved.
`
``
8
`+
`
``
9
`+
- Redistribution and use in source and binary forms, with or without
`
``
10
`+
- modification, are permitted provided that the following conditions
`
``
11
`+
- are met:
`
``
12
`+
`
``
13
`+
- Redistributions of source code must retain the above copyright
`
``
14
`+
- notice, this list of conditions and the following disclaimer.
`
``
15
`+
`
``
16
`+
- Redistributions in binary form must reproduce the above copyright
`
``
17
`+
- notice, this list of conditions and the following disclaimer in
`
``
18
`+
- the documentation and/or other materials provided with the
`
``
19
`+
- distribution.
`
``
20
`+
`
``
21
`+
- The end-user documentation included with the redistribution,
`
``
22
`+
- if any, must include the following acknowledgment:
`
``
23
`+
- "This product includes software developed by the
`
``
24
`+
- Apache Software Foundation (http://www.apache.org/)."
`
``
25
`+
- Alternately, this acknowledgment may appear in the software itself,
`
``
26
`+
- if and wherever such third-party acknowledgments normally appear.
`
``
27
`+
`
``
28
`+
- The names "Apache" and "Apache Software Foundation" and
`
``
29
`+
- "Apache Lucene" must not be used to endorse or promote products
`
``
30
`+
- derived from this software without prior written permission. For
`
``
31
`+
- written permission, please contact apache@apache.org.
`
``
32
`+
`
``
33
`+
- Products derived from this software may not be called "Apache",
`
``
34
`+
- "Apache Lucene", nor may "Apache" appear in their name, without
`
``
35
`+
- prior written permission of the Apache Software Foundation.
`
``
36
`+
`
``
37
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
``
38
`+
- WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
`
``
39
`+
- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
`
``
40
`+
- DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
`
``
41
`+
- ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
`
``
42
`+
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
`
``
43
`+
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
`
``
44
`+
- USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
`
``
45
`+
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
`
``
46
`+
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
`
``
47
`+
- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
`
``
48
`+
- SUCH DAMAGE.
`
``
49
`+
- ====================================================================
`
``
50
`+
`
``
51
`+
- This software consists of voluntary contributions made by many
`
``
52
`+
- individuals on behalf of the Apache Software Foundation. For more
`
``
53
`+
- information on the Apache Software Foundation, please see
`
``
54
`+
`
``
55
`+
*/
`
``
56
+
``
57
`+
import java.io.IOException;
`
``
58
+
``
59
`+
import org.apache.lucene.index.Term;
`
``
60
+
``
61
`+
/** Implements parallel search over a set of Searchables
.
`
``
62
`+
`
``
63
`+
Applications usually need only call the inherited {@link #search(Query)}
`
``
64
`+
- or {@link #search(Query,Filter)} methods.
`
``
65
`+
*/
`
``
66
`+
public class ParallelMultiSearcher extends MultiSearcher {
`
``
67
+
``
68
`+
private Searchable[] searchables;
`
``
69
`+
private int[] starts;
`
``
70
+
``
71
`+
/** Creates a searcher which searches searchables. */
`
``
72
`+
public ParallelMultiSearcher(Searchable[] searchables) throws IOException {
`
``
73
`+
super(searchables);
`
``
74
`+
this.searchables=searchables;
`
``
75
`+
this.starts=getStarts();
`
``
76
`+
}
`
``
77
+
``
78
`+
/**
`
``
79
`+
- TODO: parallelize this one too
`
``
80
`+
*/
`
``
81
`+
public int docFreq(Term term) throws IOException {
`
``
82
`+
int docFreq = 0;
`
``
83
`+
for (int i = 0; i < searchables.length; i++)
`
``
84
`+
docFreq += searchables[i].docFreq(term);
`
``
85
`+
return docFreq;
`
``
86
`+
}
`
``
87
+
``
88
`+
/**
`
``
89
`+
- A search implementation which spans a new thread for each
`
``
90
`+
- Searchable, waits for each search to complete and merge
`
``
91
`+
- the results back together.
`
``
92
`+
*/
`
``
93
`+
public TopDocs search(Query query, Filter filter, int nDocs)
`
``
94
`+
throws IOException {
`
``
95
`+
HitQueue hq = new HitQueue(nDocs);
`
``
96
`+
int totalHits = 0;
`
``
97
`+
MultiSearcherThread[] msta =
`
``
98
`+
new MultiSearcherThread[searchables.length];
`
``
99
`+
for (int i = 0; i < searchables.length; i++) { // search each searcher
`
``
100
`+
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
`
``
101
`+
msta[i] =
`
``
102
`+
new MultiSearcherThread(
`
``
103
`+
searchables[i],
`
``
104
`+
query,
`
``
105
`+
filter,
`
``
106
`+
nDocs,
`
``
107
`+
hq,
`
``
108
`+
i,
`
``
109
`+
starts,
`
``
110
`+
"MultiSearcher thread #" + (i + 1));
`
``
111
`+
msta[i].start();
`
``
112
`+
}
`
``
113
+
``
114
`+
for (int i = 0; i < searchables.length; i++) {
`
``
115
`+
try {
`
``
116
`+
msta[i].join();
`
``
117
`+
} catch (InterruptedException ie) {
`
``
118
`+
; // TODO: what should we do with this???
`
``
119
`+
}
`
``
120
`+
IOException ioe = msta[i].getIOException();
`
``
121
`+
if (ioe == null) {
`
``
122
`+
totalHits += msta[i].hits();
`
``
123
`+
} else {
`
``
124
`+
// if one search produced an IOException, rethrow it
`
``
125
`+
throw ioe;
`
``
126
`+
}
`
``
127
`+
}
`
``
128
+
``
129
`+
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
`
``
130
`+
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
`
``
131
`+
scoreDocs[i] = (ScoreDoc) hq.pop();
`
``
132
+
``
133
`+
return new TopDocs(totalHits, scoreDocs);
`
``
134
`+
}
`
``
135
+
``
136
`+
/** Lower-level search API.
`
``
137
`+
`
``
138
`+
{@link HitCollector#collect(int,float)} is called for every non-zero
`
``
139
`+
- scoring document.
`
``
140
`+
`
``
141
`+
Applications should only use this if they need all of the
`
``
142
`+
- matching documents. The high-level search API ({@link
`
``
143
`+
- Searcher#search(Query)}) is usually more efficient, as it skips
`
``
144
`+
- non-high-scoring hits.
`
``
145
`+
`
``
146
`+
- @param query to match documents
`
``
147
`+
- @param filter if non-null, a bitset used to eliminate some documents
`
``
148
`+
- @param results to receive hits
`
``
149
`+
*
`
``
150
`+
- TODO: parallelize this one too
`
``
151
`+
*/
`
``
152
`+
public void search(Query query, Filter filter, final HitCollector results)
`
``
153
`+
throws IOException {
`
``
154
`+
for (int i = 0; i < searchables.length; i++) {
`
``
155
+
``
156
`+
final int start = starts[i];
`
``
157
+
``
158
`+
searchables[i].search(query, filter, new HitCollector() {
`
``
159
`+
public void collect(int doc, float score) {
`
``
160
`+
results.collect(doc + start, score);
`
``
161
`+
}
`
``
162
`+
});
`
``
163
+
``
164
`+
}
`
``
165
`+
}
`
``
166
+
``
167
`+
/*
`
``
168
`+
- TODO: this one could be parallelized too
`
``
169
`+
- @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)
`
``
170
`+
*/
`
``
171
`+
public Query rewrite(Query original) throws IOException {
`
``
172
`+
Query[] queries = new Query[searchables.length];
`
``
173
`+
for (int i = 0; i < searchables.length; i++) {
`
``
174
`+
queries[i] = searchables[i].rewrite(original);
`
``
175
`+
}
`
``
176
`+
return original.combine(queries);
`
``
177
`+
}
`
``
178
+
``
179
`+
}
`
``
180
+
``
181
`+
/**
`
``
182
`+
- A thread subclass for searching a single searchable
`
``
183
`+
*/
`
``
184
`+
class MultiSearcherThread extends Thread {
`
``
185
+
``
186
`+
private Searchable searchable;
`
``
187
`+
private Query query;
`
``
188
`+
private Filter filter;
`
``
189
`+
private int nDocs;
`
``
190
`+
private int hits;
`
``
191
`+
private TopDocs docs;
`
``
192
`+
private int i;
`
``
193
`+
private HitQueue hq;
`
``
194
`+
private int[] starts;
`
``
195
`+
private IOException ioe;
`
``
196
+
``
197
`+
public MultiSearcherThread(
`
``
198
`+
Searchable searchable,
`
``
199
`+
Query query,
`
``
200
`+
Filter filter,
`
``
201
`+
int nDocs,
`
``
202
`+
HitQueue hq,
`
``
203
`+
int i,
`
``
204
`+
int[] starts,
`
``
205
`+
String name) {
`
``
206
`+
super(name);
`
``
207
`+
this.searchable = searchable;
`
``
208
`+
this.query = query;
`
``
209
`+
this.filter = filter;
`
``
210
`+
this.nDocs = nDocs;
`
``
211
`+
this.hq = hq;
`
``
212
`+
this.i = i;
`
``
213
`+
this.starts = starts;
`
``
214
`+
}
`
``
215
+
``
216
`+
public void run() {
`
``
217
`+
try {
`
``
218
`+
docs = searchable.search(query, filter, nDocs);
`
``
219
`+
}
`
``
220
`+
// Store the IOException for later use by the caller of this thread
`
``
221
`+
catch (IOException ioe) {
`
``
222
`+
this.ioe = ioe;
`
``
223
`+
}
`
``
224
`+
if (ioe == null) {
`
``
225
`+
ScoreDoc[] scoreDocs = docs.scoreDocs;
`
``
226
`+
for (int j = 0;
`
``
227
`+
j < scoreDocs.length;
`
``
228
`+
j++) { // merge scoreDocs into hq
`
``
229
`+
ScoreDoc scoreDoc = scoreDocs[j];
`
``
230
`+
scoreDoc.doc += starts[i]; // convert doc
`
``
231
`+
//it would be so nice if we had a thread-safe insert
`
``
232
`+
synchronized (hq) {
`
``
233
`+
if (!hq.insert(scoreDoc))
`
``
234
`+
break;
`
``
235
`+
} // no more scores > minScore
`
``
236
`+
}
`
``
237
`+
}
`
``
238
`+
}
`
``
239
+
``
240
`+
public int hits() {
`
``
241
`+
return docs.totalHits;
`
``
242
`+
}
`
``
243
+
``
244
`+
public IOException getIOException() {
`
``
245
`+
return ioe;
`
``
246
`+
}
`
``
247
+
``
248
`+
}
`