Added ParallelMultiSearcher. · apache/lucene-solr@b1541ce (original) (raw)

1

package org.apache.lucene.search;

2

+

3

/* ====================================================================

4

The Apache Software License, Version 1.1

5

6

7

reserved.

8

9

Redistribution and use in source and binary forms, with or without

10

modification, are permitted provided that the following conditions

11

are met:

12

13

1. Redistributions of source code must retain the above copyright

14

notice, this list of conditions and the following disclaimer.

15

16

1. Redistributions in binary form must reproduce the above copyright

17

notice, this list of conditions and the following disclaimer in

18

the documentation and/or other materials provided with the

19

distribution.

20

21

1. The end-user documentation included with the redistribution,

22

if any, must include the following acknowledgment:

23

"This product includes software developed by the

24

Apache Software Foundation (http://www.apache.org/)."

25

Alternately, this acknowledgment may appear in the software itself,

26

if and wherever such third-party acknowledgments normally appear.

27

28

1. The names "Apache" and "Apache Software Foundation" and

29

"Apache Lucene" must not be used to endorse or promote products

30

derived from this software without prior written permission. For

31

written permission, please contact apache@apache.org.

32

33

1. Products derived from this software may not be called "Apache",

34

"Apache Lucene", nor may "Apache" appear in their name, without

35

prior written permission of the Apache Software Foundation.

36

37


 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED

38

WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

39

OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

40

DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR

41

ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

42

SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

43

LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF

44

USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

45

ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,

46

OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT

47

OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

48

SUCH DAMAGE.

49

====================================================================

50

51

This software consists of voluntary contributions made by many

52

individuals on behalf of the Apache Software Foundation. For more

53

information on the Apache Software Foundation, please see

54

http://www.apache.org/.

55

56

+

57

import java.io.IOException;

58

+

59

import org.apache.lucene.index.Term;

60

+

61

/** Implements parallel search over a set of Searchables.

62

63

Applications usually need only call the inherited {@link #search(Query)}

64

or {@link #search(Query,Filter)} methods.

65

66

public class ParallelMultiSearcher extends MultiSearcher {

67

+

68

private Searchable[] searchables;

69

private int[] starts;

70

+

71

/** Creates a searcher which searches searchables. */

72

public ParallelMultiSearcher(Searchable[] searchables) throws IOException {

73

super(searchables);

74

this.searchables=searchables;

75

this.starts=getStarts();

76

}

77

+

78

/**

79

TODO: parallelize this one too

80

81

public int docFreq(Term term) throws IOException {

82

int docFreq = 0;

83

for (int i = 0; i < searchables.length; i++)

84

docFreq += searchables[i].docFreq(term);

85

return docFreq;

86

}

87

+

88

/**

89

A search implementation which spans a new thread for each

90

Searchable, waits for each search to complete and merge

91

the results back together.

92

93

public TopDocs search(Query query, Filter filter, int nDocs)

94

throws IOException {

95

HitQueue hq = new HitQueue(nDocs);

96

int totalHits = 0;

97

MultiSearcherThread[] msta =

98

new MultiSearcherThread[searchables.length];

99

for (int i = 0; i < searchables.length; i++) { // search each searcher

100

// Assume not too many searchables and cost of creating a thread is by far inferior to a search

101

msta[i] =

102

new MultiSearcherThread(

103

searchables[i],

104

query,

105

filter,

106

nDocs,

107

hq,

108

109

starts,

110

"MultiSearcher thread #" + (i + 1));

111

msta[i].start();

112

}

113

+

114

for (int i = 0; i < searchables.length; i++) {

115

try {

116

msta[i].join();

117

} catch (InterruptedException ie) {

118

; // TODO: what should we do with this???

119

}

120

IOException ioe = msta[i].getIOException();

121

if (ioe == null) {

122

totalHits += msta[i].hits();

123

} else {

124

// if one search produced an IOException, rethrow it

125

throw ioe;

126

}

127

}

128

+

129

ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];

130

for (int i = hq.size() - 1; i >= 0; i--) // put docs in array

131

scoreDocs[i] = (ScoreDoc) hq.pop();

132

+

133

return new TopDocs(totalHits, scoreDocs);

134

}

135

+

136

/** Lower-level search API.

137

138

{@link HitCollector#collect(int,float)} is called for every non-zero

139

scoring document.

140

141

Applications should only use this if they need all of the

142

matching documents. The high-level search API ({@link

143

Searcher#search(Query)}) is usually more efficient, as it skips

144

non-high-scoring hits.

145

146

@param query to match documents

147

@param filter if non-null, a bitset used to eliminate some documents

148

@param results to receive hits

149

150

TODO: parallelize this one too

151

152

public void search(Query query, Filter filter, final HitCollector results)

153

throws IOException {

154

for (int i = 0; i < searchables.length; i++) {

155

+

156

final int start = starts[i];

157

+

158

searchables[i].search(query, filter, new HitCollector() {

159

public void collect(int doc, float score) {

160

results.collect(doc + start, score);

161

}

162

});

163

+

164

}

165

}

166

+

167

168

TODO: this one could be parallelized too

169

@see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)

170

171

public Query rewrite(Query original) throws IOException {

172

Query[] queries = new Query[searchables.length];

173

for (int i = 0; i < searchables.length; i++) {

174

queries[i] = searchables[i].rewrite(original);

175

}

176

return original.combine(queries);

177

}

178

+

179

}

180

+

181

/**

182

A thread subclass for searching a single searchable

183

184

class MultiSearcherThread extends Thread {

185

+

186

private Searchable searchable;

187

private Query query;

188

private Filter filter;

189

private int nDocs;

190

private int hits;

191

private TopDocs docs;

192

private int i;

193

private HitQueue hq;

194

private int[] starts;

195

private IOException ioe;

196

+

197

public MultiSearcherThread(

198

Searchable searchable,

199

Query query,

200

Filter filter,

201

int nDocs,

202

HitQueue hq,

203

int i,

204

int[] starts,

205

String name) {

206

super(name);

207

this.searchable = searchable;

208

this.query = query;

209

this.filter = filter;

210

this.nDocs = nDocs;

211

this.hq = hq;

212

this.i = i;

213

this.starts = starts;

214

}

215

+

216

public void run() {

217

try {

218

docs = searchable.search(query, filter, nDocs);

219

}

220

// Store the IOException for later use by the caller of this thread

221

catch (IOException ioe) {

222

this.ioe = ioe;

223

}

224

if (ioe == null) {

225

ScoreDoc[] scoreDocs = docs.scoreDocs;

226

for (int j = 0;

227

j < scoreDocs.length;

228

j++) { // merge scoreDocs into hq

229

ScoreDoc scoreDoc = scoreDocs[j];

230

scoreDoc.doc += starts[i]; // convert doc

231

//it would be so nice if we had a thread-safe insert

232

synchronized (hq) {

233

if (!hq.insert(scoreDoc))

234

break;

235

} // no more scores > minScore

236

}

237

}

238

}

239

+

240

public int hits() {

241

return docs.totalHits;

242

}

243

+

244

public IOException getIOException() {

245

return ioe;

246

}

247

+

248

}