Fennel: /home/pub/open/dev/fennel/lucidera/test/SamplingExecStreamGenerator.h Source File (original) (raw)

00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 #ifndef Fennel_SamplingExecStreamGenerator_Included 00023 #define Fennel_SamplingExecStreamGenerator_Included 00024 00025 #include "fennel/test/ExecStreamGenerator.h" 00026 #include "fennel/lucidera/colstore/LcsRowScanExecStream.h" 00027 #include <math.h> 00028 00029 FENNEL_BEGIN_NAMESPACE 00030 00037 class BernoulliSamplingExecStreamGenerator 00038 : public MockProducerExecStreamGenerator 00039 { 00040 protected: 00041 boost::shared_ptr generator; 00042 00043 boost::scoped_ptr rng; 00044 00045 uint nColumns; 00046 uint iChildRow; 00047 uint iLastRow; 00048 public: 00049 explicit BernoulliSamplingExecStreamGenerator( 00050 boost::shared_ptr const &generatorInit, 00051 float prob, uint seed, uint nColumnsInit) 00052 : generator(generatorInit), 00053 rng(new BernoulliRng(prob)), 00054 nColumns(nColumnsInit), 00055 iChildRow((uint) -1), 00056 iLastRow((uint) -1) 00057 { 00058 rng->reseed(seed); 00059 } 00060 00061 virtual int64_t generateValue(uint iRow, uint iCol) 00062 { 00063 if (iRow != iLastRow) { 00064 assert(iCol == 0); 00065 00066 iChildRow++; 00067 while (rng->nextValue()) { 00068 for (int i = 0; i < nColumns; i++) { 00069 generator->generateValue(iChildRow, i); 00070 } 00071 iChildRow++; 00072 } 00073 iLastRow = iRow; 00074 } 00075 00076 return generator->generateValue(iChildRow, iCol); 00077 } 00078 }; 00079 00080 class SystemSamplingExecStreamGenerator 00081 : public MockProducerExecStreamGenerator 00082 { 00083 protected: 00084 boost::shared_ptr generator; 00085 00086 uint nColumns; 00087 uint iChildRow; 00088 uint iLastRow; 00089 00090 uint clumpSize; 00091 uint clumpDistance; 00092 uint clumpPos; 00093 00094 public: 00095 explicit SystemSamplingExecStreamGenerator( 00096 boost::shared_ptr const &generatorInit, 00097 float rate, uint nRows, uint nColumnsInit, uint nClumps) 00098 : generator(generatorInit), 00099 nColumns(nColumnsInit), 00100 iChildRow((uint) -1), 00101 iLastRow((uint) -1), 00102 clumpPos((uint) -1) 00103 { 00104 uint sampleSize = (uint)round((double)nRows * (double)rate); 00105 clumpSize = (uint)round((double)sampleSize / (double)nClumps); 00106 clumpDistance = 00107 (uint)round((double)(nRows - sampleSize) / (double)(nClumps - 1)); 00108 00109 uint rowsRequired = 00110 (clumpSize + clumpDistance) * (nClumps - 1) + clumpSize; 00111 if (rowsRequired > nRows && clumpDistance > 0) { 00112 clumpDistance--; 00113 } 00114 00115 00116 00117 00118 } 00119 00120 virtual int64_t generateValue(uint iRow, uint iCol) 00121 { 00122 if (iRow != iLastRow) { 00123 assert(iCol == 0); 00124 00125 iChildRow++; 00126 clumpPos++; 00127 00128 if (clumpPos >= clumpSize) { 00129
00130 for (uint i = 0; i < clumpDistance; i++) { 00131 00132 for (int j = 0; j < nColumns; j++) { 00133 generator->generateValue(iChildRow, j); 00134 } 00135 iChildRow++; 00136 } 00137 clumpPos = 0; 00138 } 00139 iLastRow = iRow; 00140 00141 00142 } 00143 00144 return generator->generateValue(iChildRow, iCol); 00145 } 00146 }; 00147 00148 FENNEL_END_NAMESPACE 00149 00150 #endif 00151 00152