Fennel: /home/pub/open/dev/fennel/flatfile/FlatFileParser.h Source File (original) (raw)
00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 00023 #ifndef Fennel_FlatFileParser_Included 00024 #define Fennel_FlatFileParser_Included 00025 00026 00027 #include 00028 00029 FENNEL_BEGIN_NAMESPACE 00030 00031 class FlatFileParser; 00032 typedef boost::shared_ptr SharedFlatFileParser; 00033 00037 class FENNEL_FLATFILE_EXPORT FlatFileColumnParseResult 00038 { 00039 public: 00041 enum DelimiterType { 00043 NO_DELIM = 0, 00045 FIELD_DELIM, 00047 ROW_DELIM, 00049 MAX_LENGTH 00050 }; 00051 00055 DelimiterType type; 00056 00060 uint size; 00061 00065 char *next; 00066 00071 void setResult(DelimiterType type, char *buffer, uint size); 00072 }; 00073 00077 class FENNEL_FLATFILE_EXPORT FlatFileRowParseResult 00078 { 00079 public: 00081 enum RowStatus { 00085 NO_STATUS = 0, 00090 INCOMPLETE_COLUMN, 00094 ROW_TOO_LARGE, 00099 NO_COLUMN_DELIM, 00103 TOO_FEW_COLUMNS, 00107 TOO_MANY_COLUMNS 00108 }; 00109 00110 explicit FlatFileRowParseResult(); 00111 void reset(); 00112 00116 RowStatus status; 00117 00121 VectorOfUint offsets; 00122 00126 VectorOfUint sizes; 00127 00131 VectorOfUint strippedSizes; 00132 00136 char *current; 00137 00142 char *next; 00143 00147 uint nRowDelimsRead; 00148 00152 uint getReadCount() 00153 { 00154 return offsets.size(); 00155 } 00156 00163 char *getColumn(uint iColumn) 00164 { 00165 if (sizes[iColumn] == 0) { 00166 return NULL; 00167 } 00168 return current + offsets[iColumn]; 00169 } 00170 00174 uint getRawColumnSize(uint iColumn) 00175 { 00176 return sizes[iColumn]; 00177 } 00178 00182 uint getColumnSize(uint iColumn) 00183 { 00184 return strippedSizes[iColumn]; 00185 } 00186 00190 void clear() 00191 { 00192 offsets.clear(); 00193 sizes.clear(); 00194 } 00195 00199 void resize(uint nColumns) 00200 { 00201 offsets.resize(nColumns); 00202 sizes.resize(nColumns); 00203 } 00204 00208 void setColumn(uint iColumn, uint offset, uint size) 00209 { 00210 offsets[iColumn] = offset; 00211 sizes[iColumn] = size; 00212 } 00213 00217 void setNull(uint iColumn) 00218 { 00219 setColumn(iColumn, 0, 0); 00220 } 00221 00225 void addColumn(uint offset, uint size) 00226 { 00227 offsets.push_back(offset); 00228 sizes.push_back(size); 00229 } 00230 }; 00231 00235 class FENNEL_FLATFILE_EXPORT FlatFileColumnDescriptor 00236 { 00237 public: 00238 uint maxLength; 00239 00240 #ifdef MSVC 00241 explicit FlatFileColumnDescriptor() 00242 { 00243 maxLength = 0; 00244 } 00245 #endif 00246 00247 explicit FlatFileColumnDescriptor(uint maxLengthInit) 00248 { 00249 maxLength = maxLengthInit; 00250 } 00251 }; 00252 00261 class FENNEL_FLATFILE_EXPORT FlatFileRowDescriptor 00262 : public std::vector 00263 { 00264 bool bounded; 00265 bool lenient; 00266 00267 VectorOfUint columnMap; 00268 00269 public: 00273 static const int MAX_COLUMNS = 1024; 00274 00279 static const int MAX_COLUMN_LENGTH = 65535; 00280 00284 FlatFileRowDescriptor(); 00285 00290 void setUnbounded(); 00291 00296 bool isBounded() const; 00297 00304 void setMap(VectorOfUint map) 00305 { 00306 columnMap = map; 00307 } 00308 00312 bool isMapped() const 00313 { 00314 return columnMap.size() > 0; 00315 } 00316 00321 int getMap(uint iSource) const 00322 { 00323 if (iSource >= columnMap.size()) { 00324 return -1; 00325 } 00326 return columnMap[iSource]; 00327 } 00328 00329 void setLenient(bool lenientIn) 00330 { 00331 lenient = lenientIn; 00332 } 00333 00334 bool isLenient() const 00335 { 00336 return lenient; 00337 } 00338 00345 uint getMaxColumns() const 00346 { 00347 if (!bounded) { 00348 return MAX_COLUMNS; 00349 } else if (isMapped()) { 00350 return columnMap.size(); 00351 } else { 00352 return size(); 00353 } 00354 } 00355 00361 uint getMaxLength(uint i) const 00362 { 00363 uint realIndex = 0; 00364 if (!bounded) { 00365 return MAX_COLUMN_LENGTH; 00366 } else if (isMapped()) { 00367 realIndex = getMap(i); 00368 } else { 00369 realIndex = i; 00370 } 00371 if (realIndex < 0 || realIndex >= size()) { 00372 return MAX_COLUMN_LENGTH; 00373 } else { 00374 return (*this)[realIndex].maxLength; 00375 } 00376 } 00377 }; 00378 00389 class FENNEL_FLATFILE_EXPORT FlatFileParser 00390 { 00391 char fieldDelim; 00392 char rowDelim; 00393 char quote; 00394 char escape; 00395 bool doTrim; 00396 00400 bool fixed; 00401 00417 const char *scanRowEnd( 00418 const char *buffer, 00419 int size, 00420 bool rowDelim, 00421 FlatFileRowParseResult &result); 00422 00435 const char *scanRowDelim( 00436 const char *buffer, 00437 int size, 00438 bool search); 00439 00445 bool isRowDelim(char c); 00446 00447 public: 00463 FlatFileParser( 00464 const char fieldDelim, 00465 const char rowDelim, 00466 const char quote, 00467 const char escape, 00468 bool doTrim = false); 00469 00498 void scanRow( 00499 const char *buffer, 00500 int size, 00501 const FlatFileRowDescriptor &columns, 00502 FlatFileRowParseResult &result); 00503 00518 void scanColumn( 00519 const char *buffer, 00520 uint size, 00521 uint maxLength, 00522 FlatFileColumnParseResult &result); 00523 00524 00531 void scanFixedColumn( 00532 const char *buffer, 00533 uint size, 00534 uint maxLength, 00535 FlatFileColumnParseResult &result); 00536 00545 void stripQuoting( 00546 FlatFileRowParseResult &rowResult, 00547 bool trim); 00548 00571 uint stripQuoting(char *buffer, uint size, bool untrimmed); 00572 00582 uint trim(char *buffer, uint size); 00583 }; 00584 00585 FENNEL_END_NAMESPACE 00586 00587 #endif 00588 00589