Fennel: /home/pub/open/dev/fennel/test/BTreeTest.cpp Source File (original) (raw)

00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 00023 00024 #include "fennel/common/CommonPreamble.h" 00025 #include "fennel/test/SegStorageTestBase.h" 00026 #include "fennel/btree/BTreeBuilder.h" 00027 #include "fennel/btree/BTreeVerifier.h" 00028 #include "fennel/btree/BTreeReader.h" 00029 #include "fennel/btree/BTreeWriter.h" 00030 #include "fennel/segment/SegInputStream.h" 00031 #include "fennel/segment/SegOutputStream.h" 00032 #include "fennel/tuple/StandardTypeDescriptor.h" 00033 #include "fennel/tuple/TupleAccessor.h" 00034 00035 #include <boost/test/test_tools.hpp> 00036 00037 using namespace fennel; 00038 00039 #include 00040 00041 class BTreeTest : virtual public SegStorageTestBase 00042 { 00043 enum { 00044 nRandomSeed = 1000000, 00045 iValueMax = 1000000000 00046 }; 00047 00048 enum InsertType { 00049 MONOTONIC, 00050 SEQUENTIAL, 00051 RANDOM 00052 }; 00053 00054 struct Record 00055 { 00056 int32_t key; 00057 int32_t secondKey; 00058 int32_t value; 00059 }; 00060 00061 BTreeDescriptor descriptor; 00062 00063 TupleAccessor tupleAccessor; 00064 TupleData keyData; 00065 TupleData tupleData; 00066 Record record; 00067 boost::scoped_array recordBuf; 00068 00069 int32_t readKey(); 00070 int32_t readSecondKey(); 00071 int32_t readValue(); 00072 int32_t readMultiKeyValue(); 00073 void verifyTree(uint nRecordsExpected,uint nLevelsExpected); 00074 00075 void testBulkLoadOneLevelNewRoot() 00076 { 00077 testBulkLoad(200,1,true); 00078 } 00079 00080 void testBulkLoadOneLevelReuseRoot() 00081 { 00082 testBulkLoad(200,1,false); 00083 } 00084 00085 void testBulkLoadTwoLevelsNewRoot() 00086 { 00087 testBulkLoad(20000,2,true); 00088 } 00089 00090 void testBulkLoadTwoLevelsReuseRoot() 00091 { 00092 testBulkLoad(20000,2,false); 00093 } 00094 00095 void testBulkLoadThreeLevels() 00096 { 00097 testBulkLoad(200000,3,true); 00098 } 00099 00100 void testBulkLoad(uint nRecords,uint nLevelsExpected,bool newRoot); 00101 void testScan( 00102 SharedByteInputStream, 00103 uint nRecords, 00104 bool alternating, 00105 bool deletion); 00106 void testSearch(SharedByteInputStream,uint nRecords,bool leastUpper); 00107 void testSearchLast(); 00108 00109 void testSequentialInserts() 00110 { 00111 testInserts(SEQUENTIAL); 00112 } 00113 00114 void testRandomInserts() 00115 { 00116 testInserts(RANDOM); 00117 } 00118 00119 void testMonotonicInserts() 00120 { 00121 testInserts(MONOTONIC); 00122 } 00123 00124 void testInserts(InsertType insertType); 00125 00126 void marshalRecord(); 00127 void marshalMultiKeyRecord(); 00128 void unmarshalRecord(SharedByteInputStream pInputStream); 00129 00130 void testMultiKeySearches(uint nKey1, uint nKey2); 00131 00132 void testSmallMultiKeySearches() 00133 { 00134
00135 testMultiKeySearches(200, 4); 00136 } 00137 00138 void testBigMultiKeySearches() 00139 { 00140
00141
00142 testMultiKeySearches(4, 20000); 00143 } 00144 00145 public: 00146 explicit BTreeTest() 00147 { 00148 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadOneLevelNewRoot); 00149 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadOneLevelReuseRoot); 00150 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadTwoLevelsNewRoot); 00151 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadTwoLevelsReuseRoot); 00152 FENNEL_UNIT_TEST_CASE(BTreeTest,testBulkLoadThreeLevels); 00153 FENNEL_UNIT_TEST_CASE(BTreeTest,testSequentialInserts); 00154 FENNEL_UNIT_TEST_CASE(BTreeTest,testRandomInserts); 00155 FENNEL_UNIT_TEST_CASE(BTreeTest,testMonotonicInserts); 00156 00157
00158
00159
00160 FENNEL_UNIT_TEST_CASE(BTreeTest,testSmallMultiKeySearches); 00161 FENNEL_UNIT_TEST_CASE(BTreeTest,testBigMultiKeySearches); 00162 00163 StandardTypeDescriptorFactory stdTypeFactory; 00164 TupleAttributeDescriptor attrDesc( 00165 stdTypeFactory.newDataType(STANDARD_TYPE_INT_32)); 00166 descriptor.tupleDescriptor.push_back(attrDesc); 00167 descriptor.tupleDescriptor.push_back(attrDesc); 00168 descriptor.keyProjection.push_back(0); 00169 tupleAccessor.compute(descriptor.tupleDescriptor); 00170 recordBuf.reset(new FixedBuffer[tupleAccessor.getMaxByteCount()]); 00171 tupleData.compute(descriptor.tupleDescriptor); 00172 } 00173 00174 virtual void testCaseSetUp(); 00175 virtual void testCaseTearDown(); 00176 }; 00177 00178 void BTreeTest::testCaseSetUp() 00179 { 00180 openStorage(DeviceMode::createNew); 00181 openRandomSegment(); 00182 00183 descriptor.segmentAccessor.pSegment = pRandomSegment; 00184 descriptor.segmentAccessor.pCacheAccessor = pCache; 00185 } 00186 00187 void BTreeTest::testCaseTearDown() 00188 { 00189 descriptor.segmentAccessor.reset(); 00190 SegStorageTestBase::testCaseTearDown(); 00191 } 00192 00193 void BTreeTest::marshalRecord() 00194 { 00195 tupleData[0].pData = reinterpret_cast(&record.key); 00196 tupleData[1].pData = reinterpret_cast(&record.value); 00197 tupleAccessor.marshal(tupleData, recordBuf.get()); 00198 } 00199 00200 void BTreeTest::marshalMultiKeyRecord() 00201 { 00202 tupleData[0].pData = reinterpret_cast(&record.key); 00203 tupleData[1].pData = reinterpret_cast(&record.secondKey); 00204 tupleData[2].pData = reinterpret_cast(&record.value); 00205 tupleAccessor.marshal(tupleData, recordBuf.get()); 00206 } 00207 00208 void BTreeTest::unmarshalRecord(SharedByteInputStream pInputStream) 00209 { 00210 PConstBuffer pBuf = pInputStream->getReadPointer(1); 00211 tupleAccessor.setCurrentTupleBuf(pBuf); 00212 uint cbTuple = tupleAccessor.getCurrentByteCount(); 00213 tupleAccessor.unmarshal(tupleData); 00214 record.key = readKey(); 00215 record.value = readValue(); 00216 pInputStream->consumeReadPointer(cbTuple); 00217 } 00218 00219 void BTreeTest::testBulkLoad(uint nRecords,uint nLevelsExpected,bool newRoot) 00220 { 00221 BlockNum nPagesAllocatedInitially = 00222 pRandomSegment->getAllocatedSizeInPages(); 00223 00224 descriptor.rootPageId = NULL_PAGE_ID; 00225 BTreeBuilder builder(descriptor,pRandomSegment); 00226 00227 keyData.compute(builder.getKeyDescriptor()); 00228 keyData[0].pData = reinterpret_cast(&record.key); 00229 00230
00231
00232
00233
00234
00235
00236
00237
00238
00239 if (!newRoot) { 00240 builder.createEmptyRoot(); 00241 descriptor.rootPageId = builder.getRootPageId(); 00242 } 00243 00244
00245 SegmentAccessor segmentAccessor(pRandomSegment,pCache); 00246 SharedSegOutputStream pOutputStream = 00247 SegOutputStream::newSegOutputStream(segmentAccessor); 00248 std::subtractive_rng randomNumberGenerator(nRandomSeed); 00249 record.key = 0; 00250 for (uint i = 0; i < nRecords; i++) { 00251
00252 record.key += (randomNumberGenerator(10)) + 2; 00253 record.value = randomNumberGenerator(iValueMax); 00254 00255 marshalRecord(); 00256 uint cbTuple = tupleAccessor.getCurrentByteCount(); 00257 PBuffer pBuffer = pOutputStream->getWritePointer(cbTuple); 00258 memcpy(pBuffer,recordBuf.get(),cbTuple); 00259 pOutputStream->consumeWritePointer(cbTuple); 00260 } 00261 PageId pageId = pOutputStream->getFirstPageId(); 00262 pOutputStream.reset(); 00263 SharedSegInputStream pInputStream = 00264 SegInputStream::newSegInputStream(segmentAccessor,pageId); 00265 SegStreamPosition startPos; 00266 pInputStream->getSegPos(startPos); 00267 00268
00269 builder.build(*pInputStream,nRecords,1.0); 00270 descriptor.rootPageId = builder.getRootPageId(); 00271 00272
00273 verifyTree(nRecords,nLevelsExpected); 00274 00275
00276 pInputStream->seekSegPos(startPos); 00277 testSearch(pInputStream,nRecords,true); 00278 00279
00280
00281 pInputStream->seekSegPos(startPos); 00282 testSearch(pInputStream,nRecords,false); 00283 00284
00285 pInputStream->seekSegPos(startPos); 00286 testScan(pInputStream,nRecords,false,false); 00287 00288
00289 pInputStream->seekSegPos(startPos); 00290 testScan(pInputStream,nRecords,true,true); 00291 00292
00293 verifyTree(nRecords / 2,nLevelsExpected); 00294 00295
00296 pInputStream->seekSegPos(startPos); 00297 testScan(pInputStream,nRecords,true,false); 00298 00299
00300 pInputStream->seekSegPos(startPos); 00301 pInputStream->setDeallocate(true); 00302 pInputStream.reset(); 00303 00304
00305 builder.truncate(true); 00306 00307
00308 BOOST_CHECK_EQUAL( 00309 pRandomSegment->getAllocatedSizeInPages(), 00310 nPagesAllocatedInitially); 00311 } 00312 00313 void BTreeTest::verifyTree(uint nRecordsExpected,uint nLevelsExpected) 00314 { 00315 BTreeVerifier verifier(descriptor); 00316 verifier.verify(); 00317 00318 BTreeStatistics const &stats = verifier.getStatistics(); 00319 BOOST_CHECK_EQUAL(stats.nLevels,nLevelsExpected); 00320 BOOST_CHECK_EQUAL(stats.nTuples,nRecordsExpected); 00321 } 00322 00323 void BTreeTest::testSearch( 00324 SharedByteInputStream pInputStream,uint nRecords,bool leastUpper) 00325 { 00326 BTreeReader reader(descriptor); 00327 for (uint i = 0; i < nRecords; ++i) { 00328 unmarshalRecord(pInputStream); 00329 if (!reader.searchForKey(keyData,DUP_SEEK_ANY,leastUpper)) { 00330 BOOST_FAIL("LeastUpper:" << leastUpper << 00331 ". Could not find key #" << i << ": " << record.key); 00332 } 00333 reader.getTupleAccessorForRead().unmarshal(tupleData); 00334 BOOST_CHECK_EQUAL(record.key,readKey()); 00335 BOOST_CHECK_EQUAL(record.value,readValue()); 00336 if (!(i % 10000)) { 00337 BOOST_MESSAGE( 00338 "found value = " << readValue() 00339 << " key = " << readKey()); 00340 } 00341 record.key++; 00342 if (reader.searchForKey(keyData,DUP_SEEK_ANY)) { 00343 BOOST_FAIL("Found key " << record.key << " (shouldn't exist!)"); 00344 } 00345 } 00346 } 00347 00348 void BTreeTest::testScan( 00349 SharedByteInputStream pInputStream,uint nRecords, 00350 bool alternating,bool deletion) 00351 { 00352 BTreeReader realReader(descriptor); 00353 00354 SegmentAccessor scratchAccessor = pSegmentFactory->newScratchSegment( 00355 pCache, 00356 1); 00357 00358 BTreeWriter writer(descriptor,scratchAccessor); 00359 00360 bool found; 00361 int32_t lastKey = -1; 00362 BTreeReader &reader = deletion ? writer : realReader; 00363 found = reader.searchFirst(); 00364 if (!found) { 00365 BOOST_FAIL("searchFirst found nothing"); 00366 } 00367 if (alternating && !deletion) { 00368 nRecords /= 2; 00369 } 00370 for (uint i = 0; i < nRecords; ++i) { 00371 unmarshalRecord(pInputStream); 00372 if (alternating && !deletion) { 00373 unmarshalRecord(pInputStream); 00374 } 00375 if (!found) { 00376 BOOST_FAIL("Could not searchNext for key #" 00377 << i << ": " << record.key); 00378 } 00379 reader.getTupleAccessorForRead().unmarshal(tupleData); 00380 lastKey = readKey(); 00381 BOOST_CHECK_EQUAL(record.key,lastKey); 00382 BOOST_CHECK_EQUAL(record.value,readValue()); 00383 if (!(i % 10000)) { 00384 BOOST_MESSAGE( 00385 "scanned value = " << readValue() 00386 << " key = " << lastKey); 00387 } 00388 if (deletion) { 00389 if (!alternating || !(i & 1)) { 00390 writer.deleteCurrent(); 00391 } 00392 } 00393 found = reader.searchNext(); 00394 } 00395 00396 reader.endSearch(); 00397 00398 if (!deletion) { 00399 found = reader.searchLast(); 00400 if (!found) { 00401 BOOST_FAIL("searchLast found nothing"); 00402 } 00403 reader.getTupleAccessorForRead().unmarshal(tupleData); 00404 BOOST_CHECK_EQUAL(lastKey,readKey()); 00405 reader.endSearch(); 00406 } 00407 } 00408 00409 void BTreeTest::testSearchLast() 00410 { 00411 BTreeReader reader(descriptor); 00412 bool found = reader.searchLast(); 00413 if (!found) { 00414 BOOST_FAIL("searchLast found nothing"); 00415 } 00416 } 00417 00418 int32_t BTreeTest::readKey() 00419 { 00420 return *reinterpret_cast<int32_t const *>( 00421 tupleData[0].pData); 00422 } 00423 00424 int32_t BTreeTest::readSecondKey() 00425 { 00426 return *reinterpret_cast<int32_t const *>( 00427 tupleData[1].pData); 00428 } 00429 00430 int32_t BTreeTest::readValue() 00431 { 00432 return *reinterpret_cast<int32_t const *>( 00433 tupleData[1].pData); 00434 } 00435 00436 int32_t BTreeTest::readMultiKeyValue() 00437 { 00438 return *reinterpret_cast<int32_t const *>( 00439 tupleData[2].pData); 00440 } 00441 00442 void BTreeTest::testInserts(InsertType insertType) 00443 { 00444 uint nRecords = 200000; 00445 descriptor.rootPageId = NULL_PAGE_ID; 00446 BTreeBuilder builder(descriptor,pRandomSegment); 00447 00448 keyData.compute(builder.getKeyDescriptor()); 00449 keyData[0].pData = reinterpret_cast(&record.key); 00450 00451 tupleData.compute(descriptor.tupleDescriptor); 00452 00453 builder.createEmptyRoot(); 00454 descriptor.rootPageId = builder.getRootPageId(); 00455 00456 SegmentAccessor scratchAccessor = pSegmentFactory->newScratchSegment( 00457 pCache, 00458 1); 00459 00460 bool monotonic = (insertType == MONOTONIC); 00461 BTreeWriter writer(descriptor,scratchAccessor,monotonic); 00462 00463 std::vector keys; 00464 for (uint i = 0; i < nRecords; i++) { 00465 keys.push_back(i); 00466 } 00467 if (insertType == RANDOM) { 00468 std::random_shuffle(keys.begin(), keys.end()); 00469 } 00470 00471
00472
00473 for (uint i = 0; i < nRecords; i++) { 00474 record.key = keys[i]; 00475 record.value = -keys[i]; 00476 marshalRecord(); 00477 writer.insertTupleFromBuffer( 00478 recordBuf.get(), DUP_FAIL); 00479 } 00480 00481 BTreeReader reader(descriptor); 00482 for (uint i = 0; i < nRecords; ++i) { 00483 record.key = i; 00484 record.value = -i; 00485 if (!reader.searchForKey(keyData,DUP_SEEK_ANY)) { 00486 BOOST_FAIL("Could not find key #" << i << ": " << record.key); 00487 } 00488 reader.getTupleAccessorForRead().unmarshal(tupleData); 00489 BOOST_CHECK_EQUAL(record.key,readKey()); 00490 BOOST_CHECK_EQUAL(record.value,readValue()); 00491 } 00492 } 00493 00494 void BTreeTest::testMultiKeySearches(uint nKey1, uint nKey2) 00495 { 00496
00497 StandardTypeDescriptorFactory stdTypeFactory; 00498 TupleAttributeDescriptor attrDesc( 00499 stdTypeFactory.newDataType(STANDARD_TYPE_INT_32)); 00500 descriptor.tupleDescriptor.clear(); 00501 descriptor.tupleDescriptor.push_back(attrDesc); 00502 descriptor.tupleDescriptor.push_back(attrDesc); 00503 descriptor.keyProjection.clear(); 00504 descriptor.keyProjection.push_back(0); 00505 00506
00507 descriptor.tupleDescriptor.push_back(attrDesc); 00508 descriptor.keyProjection.push_back(1); 00509 00510 tupleAccessor.compute(descriptor.tupleDescriptor); 00511 recordBuf.reset(new FixedBuffer[tupleAccessor.getMaxByteCount()]); 00512 00513
00514
00515 descriptor.rootPageId = NULL_PAGE_ID; 00516 BTreeBuilder builder(descriptor,pRandomSegment); 00517 00518 tupleData.compute(descriptor.tupleDescriptor); 00519 00520 builder.createEmptyRoot(); 00521 descriptor.rootPageId = builder.getRootPageId(); 00522 00523 SegmentAccessor scratchAccessor = pSegmentFactory->newScratchSegment( 00524 pCache, 00525 1); 00526 00527 BTreeWriter writer(descriptor,scratchAccessor,false); 00528 00529 for (uint i = 0; i < nKey1; i++) { 00530 for (uint j = 0; j < nKey2; j++) { 00531 record.key = i; 00532 record.secondKey = j; 00533 record.value = i + j; 00534 marshalMultiKeyRecord(); 00535 writer.insertTupleFromBuffer(recordBuf.get(), DUP_FAIL); 00536 } 00537 } 00538 00539
00540 TupleDescriptor searchKeyDesc; 00541 searchKeyDesc.push_back(attrDesc); 00542 keyData.compute(searchKeyDesc); 00543 keyData[0].pData = reinterpret_cast(&record.key); 00544 00545
00546 BTreeReader reader(descriptor); 00547 for (uint i = 0; i < nKey1; ++i) { 00548 record.key = i; 00549 00550 if (!reader.searchForKey(keyData,DUP_SEEK_BEGIN)) { 00551 BOOST_FAIL("Could not find begin key #" << i); 00552 } 00553 reader.getTupleAccessorForRead().unmarshal(tupleData); 00554 BOOST_CHECK_EQUAL(i,readKey()); 00555 BOOST_CHECK_EQUAL(0,readSecondKey()); 00556 BOOST_CHECK_EQUAL(i,readMultiKeyValue()); 00557 00558
00559
00560 reader.searchForKey(keyData,DUP_SEEK_END); 00561 00562 if (i == nKey1 - 1) { 00563 if (!reader.isSingular()) { 00564 BOOST_FAIL( 00565 "Should have reached end of tree for end key #" << i); 00566 } 00567 } else { 00568 reader.getTupleAccessorForRead().unmarshal(tupleData); 00569 BOOST_CHECK_EQUAL(i + 1,readKey()); 00570 BOOST_CHECK_EQUAL(0,readSecondKey()); 00571 BOOST_CHECK_EQUAL(i + 1,readMultiKeyValue()); 00572 } 00573 } 00574 reader.endSearch(); 00575 00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586 00587 TupleData multiKeyData; 00588 multiKeyData.compute(builder.getKeyDescriptor()); 00589 multiKeyData[0].pData = reinterpret_cast(&record.key); 00590 multiKeyData[1].pData = reinterpret_cast(&record.secondKey); 00591 for (uint i = 0; i < nKey1; i++) { 00592 for (uint j = 0; j < nKey2; j++) { 00593 record.key = i; 00594 record.secondKey = j; 00595 if (!writer.searchForKey(multiKeyData,DUP_SEEK_ANY)) { 00596 BOOST_FAIL("Could not find key #" << i); 00597 } 00598 writer.deleteCurrent(); 00599 00600 record.secondKey = j + nKey2; 00601 record.value = i + j + nKey2; 00602 marshalMultiKeyRecord(); 00603 writer.insertTupleFromBuffer(recordBuf.get(), DUP_FAIL); 00604 } 00605 } 00606 writer.endSearch(); 00607 for (uint i = 0; i < nKey1; i++) { 00608 record.key = i; 00609 if (!reader.searchForKey(keyData,DUP_SEEK_BEGIN)) { 00610 BOOST_FAIL("Could not find begin key #" << i); 00611 } 00612 reader.getTupleAccessorForRead().unmarshal(tupleData); 00613 BOOST_CHECK_EQUAL(i,readKey()); 00614 BOOST_CHECK_EQUAL(nKey2,readSecondKey()); 00615 BOOST_CHECK_EQUAL(i + nKey2,readMultiKeyValue()); 00616 00617 reader.searchForKey(keyData,DUP_SEEK_END); 00618 00619 if (i == nKey1 - 1) { 00620 if (!reader.isSingular()) { 00621 BOOST_FAIL( 00622 "Should have reached end of tree for end key #" << i); 00623 } 00624 } else { 00625 reader.getTupleAccessorForRead().unmarshal(tupleData); 00626 BOOST_CHECK_EQUAL(i + 1,readKey()); 00627 BOOST_CHECK_EQUAL(nKey2,readSecondKey()); 00628 BOOST_CHECK_EQUAL(i + 1 + nKey2,readMultiKeyValue()); 00629 } 00630 00631 record.secondKey = 0; 00632 reader.searchForKey(multiKeyData,DUP_SEEK_END); 00633 00634 reader.getTupleAccessorForRead().unmarshal(tupleData); 00635 BOOST_CHECK_EQUAL(i,readKey()); 00636 BOOST_CHECK_EQUAL(nKey2,readSecondKey()); 00637 BOOST_CHECK_EQUAL(i + nKey2,readMultiKeyValue()); 00638 } 00639 } 00640 00641 FENNEL_UNIT_TEST_SUITE(BTreeTest); 00642 00643