LcsHash.cpp Source File (original) (raw)

00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 #include "fennel/common/CommonPreamble.h" 00023 #include "fennel/lucidera/colstore/LcsHash.h" 00024 00025 FENNEL_BEGIN_CPPFILE("$Id: //open/dev/fennel/lucidera/colstore/LcsHash.cpp#13 $"); 00026 00027 00028 00029 00030 static uint8_t MagicTable[256] = 00031 { 00032 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 00033 249, 230, 44, 163, 14, 197, 213, 181, 161, 85, 218, 80, 00034 64, 239, 24, 226, 236, 142, 38, 200, 110, 177, 104, 103, 00035 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222, 00036 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 226, 00037 236, 142, 38, 235, 97, 234, 57, 22, 60, 250, 82, 175, 00038 208, 5, 127, 199, 111, 62, 135, 248, 174, 169, 211, 58, 00039 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243, 00040 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 00041 153, 246, 216, 219, 119, 68, 223, 78, 83, 88, 201, 99, 00042 122, 11, 92, 32, 136, 114, 52, 10, 138, 30, 48, 183, 00043 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152, 00044 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 00045 90, 53, 23, 131, 125, 173, 15, 238, 79, 95, 89, 16, 00046 105, 137, 225, 224, 217, 160, 37, 123, 118, 73, 2, 157, 00047 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229, 00048 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 00049 19, 205, 39, 203, 233, 40, 186, 147, 198, 192, 155, 33, 00050 164, 191, 98, 204, 165, 180, 117, 76, 140, 36, 210, 172, 00051 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120, 00052 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 00053 71, 109, 184, 209 00054 }; 00055 00056 00057 00058 00059 00060 00061 LcsHash::LcsHash() 00062 { 00063 columnId = 0; 00064 valCnt = 0; 00065 maxValueSize = 0; 00066 magicTable = MagicTable; 00067 numMatches = 0; 00068 numChecks = 0; 00069 } 00070 00071 void LcsHash::init( 00072 PBuffer hashBlockInit, 00073 SharedLcsClusterNodeWriter clusterBlockWriterInit, 00074 TupleDescriptor const &colTupleDescInit, 00075 uint columnIdInit, 00076 uint blockSizeInit) 00077 { 00078
00079 00080 00081 memset(hashBlockInit,0,blockSizeInit); 00082 hash.init(hashBlockInit, blockSizeInit); 00083 00084 columnId = columnIdInit; 00085 valCnt = 0; 00086 maxValueSize = 0; 00087 clusterBlockWriter = clusterBlockWriterInit; 00088 00089 assert(colTupleDescInit.size() == 1); 00090 colTupleDesc = colTupleDescInit; 00091 attrAccessor.compute(colTupleDesc[0]); 00092 00093
00094 00095 00096 00097 colTuple.computeAndAllocate(colTupleDesc); 00098 searchTuple.computeAndAllocate(colTupleDesc); 00099 00100
00101 00102 00103 00104 00105 colTupleBuffer.reset( 00106 new FixedBuffer[attrAccessor.getMaxByteCount()]); 00107 00108 compareInst = SharedLcsCompareColKeyUsingOffsetIndex( 00109 new LcsCompareColKeyUsingOffsetIndex( 00110 clusterBlockWriter, &hash, colTupleDesc, columnId, attrAccessor)); 00111 } 00112 00113 void LcsHash::insert( 00114 TupleDatum &colTupleDatum, 00115 LcsHashValOrd *valOrd, 00116 bool *undoInsert) 00117 { 00118
00119 00120 00121 PBuffer dataWithLen = colTupleBuffer.get(); 00122 attrAccessor.storeValue(colTupleDatum, dataWithLen); 00123 00124 insert(dataWithLen, valOrd, undoInsert); 00125 } 00126 00127 00128 void LcsHash::insert( 00129 PBuffer dataWithLen, 00130 LcsHashValOrd *valOrd, 00131 bool *undoInsert) 00132 { 00133 uint key; 00134 uint16_t newValueOffset; 00135 LcsHashValueNode *vPtr = 0; 00136 TupleStorageByteLength storageLength; 00137 00138
00139 00140 00141 00142 00143 00144 bool noCompress = clusterBlockWriter->noCompressMode(columnId); 00145 key = noCompress ? 0 : computeKey(dataWithLen); 00146 00147 *undoInsert = false; 00148 00149
00150 00151 00152 00153 00154 00155 if (noCompress || search (key, dataWithLen, valOrd, &vPtr)) { 00156 LcsHashValueNode *newNode; 00157 00158
00159 00160 00161 00162 00163 *undoInsert = 00164 hash.isFull() || 00165 clusterBlockWriter ->addValue( 00166 columnId, dataWithLen, &newValueOffset); 00167 00168 if (*undoInsert) { 00169
00170 00171 00172 undo.set(NOTHING, key, maxValueSize, 0); 00173 return; 00174 } 00175 00176
00177 00178 00179 00180 00181 00182 00183 newNode = hash.getNewValueNode(); 00184 newNode->valueOffset = newValueOffset; 00185 *valOrd = valCnt ++; 00186 valOrd->setValueInBatch(); 00187 newNode->valueOrd = *valOrd; 00188 00189 hash.insertNewValueNode(key, newNode); 00190 00191
00192 00193 00194 undo.set(NEWENTRY, key, maxValueSize, 0); 00195 00196 storageLength = attrAccessor.getStoredByteCount(dataWithLen); 00197 00198 if (storageLength > maxValueSize) { 00199 maxValueSize = storageLength; 00200 } 00201 } else { 00202
00203 00204 00205 00206 00207 00208 00209 bool bFirstTimeInBatch = !valOrd->isValueInBatch(); 00210 00211 *undoInsert = 00212 clusterBlockWriter ->addValue(columnId, bFirstTimeInBatch); 00213 00214 if (*undoInsert) { 00215
00216 00217 00218 undo.set(NOTHING, key, maxValueSize, 0); 00219 return; 00220 } 00221 00222 (vPtr->valueOrd).setValueInBatch(); 00223 *valOrd = vPtr->valueOrd; 00224 00225
00226 00227 00228 if (bFirstTimeInBatch) { 00229 undo.set(NEWBATCHVALUE, key, maxValueSize, vPtr); 00230 } else { 00231 undo.set(NOTHING, key, maxValueSize, 0); 00232 } 00233 } 00234
00235 00236 00237 00238 } 00239 00240 void LcsHash::undoInsert(TupleDatum &colTupleDatum) 00241 { 00242
00243 00244 00245 PBuffer dataWithLen = colTupleBuffer.get(); 00246 attrAccessor.storeValue(colTupleDatum, dataWithLen); 00247 00248 undoInsert(dataWithLen); 00249 00250 } 00251 00252 void LcsHash::undoInsert(PBuffer dataWithLen) 00253 { 00254 switch (undo.what) { 00255 case NOTHING: 00256 { 00257
00258 00259 00260 clusterBlockWriter->undoValue(columnId, NULL, false); 00261 break; 00262 } 00263 case NEWENTRY: 00264 { 00265
00266 00267 00268 00269 00270 00271 00272 00273 00274 00275 valCnt--; 00276 hash.undoNewValueNode(undo.key); 00277 maxValueSize = undo.origMaxValueSize; 00278 clusterBlockWriter->undoValue(columnId, dataWithLen, true); 00279 break; 00280 } 00281 case NEWBATCHVALUE: 00282 { 00283
00284 00285 00286 00287 clusterBlockWriter->undoValue(columnId, NULL, true); 00288 (&undo.vPtr->valueOrd)->clearValueInBatch(); 00289 break; 00290 } 00291 } 00292 undo.reset(); 00293 } 00294 00295 bool LcsHash::search( 00296 uint key, 00297 PBuffer dataWithLen, 00298 LcsHashValOrd *valOrd, 00299 LcsHashValueNode **vNode = 0) 00300 { 00301 LcsHashValueNode *valueNode; 00302 bool compareRes; 00303 00304 attrAccessor.loadValue(colTuple[0], dataWithLen); 00305 00306 for (valueNode = hash.getFirstValueNode(key); 00307 valueNode != NULL; 00308 valueNode = hash.getNextValueNode(valueNode)) 00309 { 00310 numChecks++; 00311 00312
00313 00314 00315 00316 if (valueNode->valueOffset == 0) { 00317 continue; 00318 } 00319 00320 attrAccessor.loadValue( 00321 searchTuple[0], 00322 clusterBlockWriter->getOffsetPtr(columnId, valueNode->valueOffset)); 00323 00324 compareRes = colTupleDesc.compareTuples(colTuple, searchTuple); 00325 00326
00327 00328 00329 searchTuple.resetBuffer(); 00330 00331 if (compareRes == 0) { 00332 numMatches++; 00333 *valOrd = valueNode->valueOrd; 00334 if (vNode) { 00335 *vNode = valueNode; 00336 } 00337 colTuple.resetBuffer(); 00338 return true; 00339 } 00340 } 00341 00342 colTuple.resetBuffer(); 00343
00344 00345 00346 return false; 00347 } 00348 00349 00350 void LcsHash::prepareCompressedBatch( 00351 uint8_t *rowArray, 00352 uint numRows, 00353 uint16_t *numVals, 00354 uint16_t *offsetIndexVector) 00355 { 00356 uint16_t i; 00357 uint16_t rowWORDArray=(uint16_t)rowArray; 00358 *numVals = 0; 00359 00360
00361 00362 00363 for (i = 0; i < valCnt; i++) { 00364 if ((hash.valueNodes[i].valueOrd).isValueInBatch()) { 00365 hash.valueNodes[i].sortedOrd = *numVals; 00366 offsetIndexVector[(*numVals)++] = i; 00367 } 00368 } 00369 00370
00371 00372 00373 std::sort( 00374 offsetIndexVector, 00375 offsetIndexVector + (*numVals), 00376 LcsCompare(compareInst)); 00377 00378
00379 00380 00381 00382 for (i = 0; i < *numVals; i++) { 00383 hash.valueNodes[offsetIndexVector[i]].sortedOrd = i; 00384 } 00385 00386
00387 00388 00389 00390 00391 00392 for (i = 0; i < *numVals; i++) { 00393 offsetIndexVector[i] = 00394 hash.valueNodes[offsetIndexVector[i]].valueOffset; 00395 } 00396 00397
00398 00399 00400 00401 00402 for (i = 0; i < numRows; i++) { 00403 rowWORDArray[i] = hash.valueNodes[rowWORDArray[i]].sortedOrd; 00404 } 00405 } 00406 00407 void LcsHash::prepareFixedOrVariableBatch( 00408 uint8_t *rowArray, 00409 uint numRows) 00410 { 00411 uint i; 00412 uint16_t *rowWORDArray=(uint16_t*)rowArray; 00413 LcsHashValueNode *pValueNodes; 00414 00415 pValueNodes = hash.valueNodes; 00416 00417
00418 00419 00420 for (i = 0; i < numRows; i++) { 00421 rowWORDArray[i] = pValueNodes[rowWORDArray[i]].valueOffset; 00422 } 00423 } 00424 00425 00426 void LcsHash::clearFixedEntries() 00427 { 00428
00429 00430 00431 if ( clusterBlockWriter ->noCompressMode(columnId)) { 00432 for (uint i = 0; i < valCnt; i++) { 00433 if ((hash.valueNodes[i].valueOrd).isValueInBatch()) { 00434 hash.valueNodes[i].valueOffset = 0; 00435 } 00436 } 00437 } 00438 } 00439 00440 void LcsHash::restore(uint numVals, uint16_t lastValOff) 00441 { 00442 uint i; 00443 uint key; 00444 PBuffer dataWithLen; 00445 LcsHashValueNode *newNode; 00446 LcsHashValOrd dummy; 00447 LcsHashValOrd valOrd; 00448 TupleStorageByteLength storageLength; 00449 00450
00451 00452 00453 00454 00455 00456 bool noCompress = clusterBlockWriter->noCompressMode(columnId); 00457 00458 for (i = 0; i < numVals && !(hash.isFull()); i++) { 00459 dataWithLen = clusterBlockWriter->getOffsetPtr(columnId,lastValOff); 00460 key = noCompress ? 0 : computeKey(dataWithLen); 00461 00462
00463 00464 00465 00466 00467 if (noCompress || search (key, dataWithLen, &dummy)) { 00468 newNode = hash.getNewValueNode(); 00469 00470 valOrd = valCnt++; 00471 newNode->valueOrd = valOrd; 00472 newNode->valueOffset = (uint16_t) lastValOff; 00473 00474 hash.insertNewValueNode(key, newNode); 00475 00476 storageLength = attrAccessor.getStoredByteCount(dataWithLen); 00477 if (storageLength > maxValueSize) { 00478 maxValueSize = storageLength; 00479 } 00480 } 00481 00482 lastValOff = clusterBlockWriter->getNextVal( 00483 columnId, 00484 (uint16_t)lastValOff); 00485 } 00486 } 00487 00488 void LcsHash::startNewBatch(uint leftOvers) 00489 { 00490
00491 00492 00493 00494 if (clusterBlockWriter->noCompressMode(columnId) || 00495 hash.isFull(leftOvers)) 00496 { 00497 hash.resetHash(); 00498 valCnt = 0; 00499 maxValueSize = 0; 00500 } else { 00501 hash.resetBatch(); 00502 } 00503 } 00504 00505 00506 00507 00508 00509 00510 uint LcsHash::computeKey(PBuffer dataWithLen) 00511 { 00512 uint8_t keyVal[2] = {0,0}, oldKeyVal[2]={0,17}; 00513 uint i, colSize = attrAccessor.getStoredByteCount(dataWithLen); 00514 00515
00516 00517 00518 00519 for (i = 0; 00520 i < colSize; 00521 oldKeyVal[0] = keyVal[0], oldKeyVal[1] = keyVal[1], i++, dataWithLen++) 00522 { 00523 keyVal[0] = magicTable[oldKeyVal[0] ^ *dataWithLen]; 00524 keyVal[1] = magicTable[oldKeyVal[1] ^ *dataWithLen]; 00525 } 00526 00527 return ((keyVal[1]<<8) + keyVal[0]) % hash.numHashEntries(); 00528 } 00529 00530 00531 00532 00533 00534 00535 void LcsHashTable::init(PBuffer hashBlockInit, uint hashBlockSizeInit) 00536 { 00537 hashBlock = hashBlockInit; 00538 hashBlockSize = hashBlockSizeInit; 00539 memset(hashBlock, 0, hashBlockSize); 00540 00541
00542 00543 00544 00545 hashTableSize = hashBlockSize / 00546 (sizeof(uint16_t) + sizeof(LcsHashValueNode)); 00547 00548
00549 00550 00551 entry = (uint16_t *)hashBlock; 00552 00553
00554 00555 00556 valueNodes = reinterpret_cast<LcsHashValueNode *>( 00557 hashBlock + sizeof(uint16_t) * hashTableSize); 00558 00559
00560 00561 00562 nextValueNode = 0; 00563 } 00564 00565 00566 LcsCompareColKeyUsingOffsetIndex::LcsCompareColKeyUsingOffsetIndex( 00567 SharedLcsClusterNodeWriter clusterBlockWriterInit, 00568 LcsHashTable *hashTableInit, 00569 TupleDescriptor const &colTupleDescInit, 00570 uint columnIdInit, 00571 UnalignedAttributeAccessor const &attrAccessorInit) 00572 { 00573 hashTable = hashTableInit; 00574 clusterBlockWriter = clusterBlockWriterInit; 00575 columnId = columnIdInit; 00576 colTupleDesc = colTupleDescInit; 00577 colTuple1.computeAndAllocate(colTupleDesc); 00578 colTuple2.computeAndAllocate(colTupleDesc); 00579 attrAccessor = attrAccessorInit; 00580 00581
00582 00583 00584 assert((colTuple1.size() == 1) && (colTuple2.size() == 1)); 00585 } 00586 00587 00588 bool LcsCompareColKeyUsingOffsetIndex::lessThan( 00589 const uint16_t colKeyOffsetIndex1, 00590 const uint16_t colKeyOffsetIndex2) 00591 { 00592 bool isLessThan = false; 00593 00594
00595 00596 00597 00598 attrAccessor.loadValue( 00599 colTuple1[0], 00600 clusterBlockWriter->getOffsetPtr( 00601 columnId, hashTable->valueNodes[colKeyOffsetIndex1].valueOffset)); 00602 00603 attrAccessor.loadValue( 00604 colTuple2[0], 00605 clusterBlockWriter->getOffsetPtr( 00606 columnId, hashTable->valueNodes[colKeyOffsetIndex2].valueOffset)); 00607 00608
00609 00610 00611 00612 isLessThan = colTupleDesc.compareTuples(colTuple1, colTuple2) < 0; 00613 00614 colTuple1.resetBuffer(); 00615 colTuple2.resetBuffer(); 00616 00617 return (isLessThan); 00618 } 00619 00620 FENNEL_END_CPPFILE("$Id: //open/dev/fennel/lucidera/colstore/LcsHash.cpp#13 $"); 00621 00622