SQLite: fossildelta.c (original) (raw)

`/* ** 2019-02-19 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. **


** ** This SQLite extension implements the delta functions used by the RBU ** extension. Three scalar functions and one table-valued function are ** implemented here: ** ** delta_apply(X,D) -- apply delta D to file X and return the result ** delta_create(X,Y) -- compute and return a delta that carries X into Y ** delta_output_size(D) -- blob size in bytes output from applying delta D ** delta_parse(D) -- returns rows describing delta D ** ** The delta format is the Fossil delta format, described in a comment ** on the delete_create() function implementation below, and also at ** ** https://fossil-scm.org/fossil/doc/trunk/www/delta_format.wiki ** ** This delta format is used by the RBU extension, which is the main ** reason that these routines are included in the extension library. ** RBU does not use this extension directly. Rather, this extension is ** provided as a convenience to developers who want to analyze RBU files ** that contain deltas. */ #include <string.h> #include <assert.h> #include <stdlib.h> #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1

#ifndef SQLITE_AMALGAMATION /* ** The "u32" type must be an unsigned 32-bit integer. Adjust this */ typedef unsigned int u32;

/* ** Must be a 16-bit value */ typedef short int s16; typedef unsigned short int u16;

#endif /* SQLITE_AMALGAMATION */

/* ** The width of a hash window in bytes. The algorithm only works if this ** is a power of 2. */ #define NHASH 16

/* ** The current state of the rolling hash. ** ** z[] holds the values that have been hashed. z[] is a circular buffer. ** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of ** the window. ** ** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted ** sum. Hash.b is z[i]NHASH + z[i+1](NHASH-1) + ... + z[i+NHASH-1]*1. ** (Each index for z[] should be module NHASH, of course. The %NHASH operator ** is omitted in the prior expression for brevity.) / typedef struct hash hash; struct hash { u16 a, b; / Hash values / u16 i; / Start of the hash window / char z[NHASH]; / The values that have been hashed */ };

/* ** Initialize the rolling hash using the first NHASH characters of z[] */ static void hash_init(hash *pHash, const char *z){ u16 a, b, i; a = b = z[0]; for(i=1; i<NHASH; i++){ a += z[i]; b += a; } memcpy(pHash->z, z, NHASH); pHash->a = a & 0xffff; pHash->b = b & 0xffff; pHash->i = 0; }

/* ** Advance the rolling hash by a single character "c" */ static void hash_next(hash pHash, int c){ u16 old = pHash->z[pHash->i]; pHash->z[pHash->i] = c; pHash->i = (pHash->i+1)&(NHASH-1); pHash->a = pHash->a - old + c; pHash->b = pHash->b - NHASHold + pHash->a; }

/* ** Return a 32-bit hash value */ static u32 hash_32bit(hash *pHash){ return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16); }

/* ** Compute a hash on NHASH bytes. ** ** This routine is intended to be equivalent to: ** hash h; ** hash_init(&h, zInput); ** return hash_32bit(&h); */ static u32 hash_once(const char *z){ u16 a, b, i; a = b = z[0]; for(i=1; i<NHASH; i++){ a += z[i]; b += a; } return a | (((u32)b)<<16); }

/* ** Write an base-64 integer into the given buffer. */ static void putInt(unsigned int v, char *pz){ static const char zDigits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"; / 123456789 123456789 123456789 123456789 123456789 123456789 123 */ int i, j; char zBuf[20]; if( v==0 ){ *(*pz)++ = '0'; return; } for(i=0; v>0; i++, v>>=6){ zBuf[i] = zDigits[v&0x3f]; } for(j=i-1; j>=0; j--){ *(*pz)++ = zBuf[j]; } }

/* ** Read bytes from *pz and convert them into a positive integer. When ** finished, leave *pz pointing to the first character past the end of ** the integer. The *pLen parameter holds the length of the string ** in *pz and is decremented once for each character in the integer. */ static unsigned int deltaGetInt(const char **pz, int *pLen){ static const signed char zValue[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36, -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1, }; unsigned int v = 0; int c; unsigned char z = (unsigned char)*pz; unsigned char zStart = z; while( (c = zValue[0x7f&(z++)])>=0 ){ v = (v<<6) + c; } z--; *pLen -= z - zStart; pz = (char)z; return v; }

/* ** Return the number digits in the base-64 representation of a positive integer */ static int digit_count(int v){ unsigned int i, x; for(i=1, x=64; v>=x; i++, x <<= 6){} return i; }

#ifdef GNUC

define GCC_VERSION (GNUC*1000000+GNUC_MINOR*1000+GNUC_PATCHLEVEL)

#else

define GCC_VERSION 0

#endif

/* ** Compute a 32-bit big-endian checksum on the N-byte buffer. If the ** buffer is not a multiple of 4 bytes length, compute the sum that would ** have occurred if the buffer was padded with zeros to the next multiple ** of four bytes. / static unsigned int checksum(const char zIn, size_t N){ static const int byteOrderTest = 1; const unsigned char z = (const unsigned char )zIn; const unsigned char zEnd = (const unsigned char)&zIn[N&~3]; unsigned sum = 0; assert( (z - (const unsigned char)0)%4==0 ); / Four-byte alignment / if( 0==(char)&byteOrderTest ){ / This is a big-endian machine */ while( z<zEnd ){ sum += *(unsigned*)z; z += 4; } }else{ /* A little-endian machine */ #if GCC_VERSION>=4003000 while( z<zEnd ){ sum += __builtin_bswap32(*(unsigned*)z); z += 4; } #elif defined(_MSC_VER) && _MSC_VER>=1300 while( z<zEnd ){ sum += _byteswap_ulong(*(unsigned*)z); z += 4; } #else unsigned sum0 = 0; unsigned sum1 = 0; unsigned sum2 = 0; while(N >= 16){ sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]); sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]); sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]); sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]); z += 16; N -= 16; } while(N >= 4){ sum0 += z[0]; sum1 += z[1]; sum2 += z[2]; sum += z[3]; z += 4; N -= 4; } sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24); #endif } switch(N&3){ case 3: sum += (z[2] << 8); case 2: sum += (z[1] << 16); case 1: sum += (z[0] << 24); default: ; } return sum; }

/* ** Create a new delta. ** ** The delta is written into a preallocated buffer, zDelta, which ** should be at least 60 bytes longer than the target file, zOut. ** The delta string will be NUL-terminated, but it might also contain ** embedded NUL characters if either the zSrc or zOut files are ** binary. This function returns the length of the delta string ** in bytes, excluding the final NUL terminator character. ** ** Output Format: ** ** The delta begins with a base64 number followed by a newline. This ** number is the number of bytes in the TARGET file. Thus, given a ** delta file z, a program can compute the size of the output file ** simply by reading the first line and decoding the base-64 number ** found there. The delta_output_size() routine does exactly this. ** ** After the initial size number, the delta consists of a series of ** literal text segments and commands to copy from the SOURCE file. ** A copy command looks like this: ** ** NNN@MMM, ** ** where NNN is the number of bytes to be copied and MMM is the offset ** into the source file of the first byte (both base-64). If NNN is 0 ** it means copy the rest of the input file. Literal text is like this: ** ** NNN:TTTTT ** ** where NNN is the number of bytes of text (base-64) and TTTTT is the text. ** ** The last term is of the form ** ** NNN; ** ** In this case, NNN is a 32-bit bigendian checksum of the output file ** that can be used to verify that the delta applied correctly. All ** numbers are in base-64. ** ** Pure text files generate a pure text delta. Binary files generate a ** delta that may contain some binary data. ** ** Algorithm: ** ** The encoder first builds a hash table to help it find matching ** patterns in the source file. 16-byte chunks of the source file ** sampled at evenly spaced intervals are used to populate the hash ** table. ** ** Next we begin scanning the target file using a sliding 16-byte ** window. The hash of the 16-byte window in the target is used to ** search for a matching section in the source file. When a match ** is found, a copy command is added to the delta. An effort is ** made to extend the matching section to regions that come before ** and after the 16-byte hash window. A copy command is only issued ** if the result would use less space that just quoting the text ** literally. Literal text is added to the delta for sections that ** do not match or which can not be encoded efficiently using copy ** commands. */ static int delta_create( const char zSrc, / The source or pattern file / unsigned int lenSrc, / Length of the source file */ const char zOut, / The target file / unsigned int lenOut, / Length of the target file */ char zDelta / Write the delta into this buffer */ ){ int i, base; char zOrigDelta = zDelta; hash h; int nHash; / Number of hash table entries */ int landmark; / Primary hash table */ int collide; / Collision chain / int lastRead = -1; / Last byte of zSrc read by a COPY command */

/* Add the target file size to the beginning of the delta */ putInt(lenOut, &zDelta); *(zDelta++) = '\n';

/* If the source file is very small, it means that we have no ** chance of ever doing a copy command. Just output a single ** literal segment for the entire target and exit. */ if( lenSrc<=NHASH ){ putInt(lenOut, &zDelta); *(zDelta++) = ':'; memcpy(zDelta, zOut, lenOut); zDelta += lenOut; putInt(checksum(zOut, lenOut), &zDelta); *(zDelta++) = ';'; return zDelta - zOrigDelta; }

/* Compute the hash table used to locate matching sections in the ** source file. / nHash = lenSrc/NHASH; collide = sqlite3_malloc64( (sqlite3_int64)nHash2sizeof(int) ); memset(collide, -1, nHash2*sizeof(int)); landmark = &collide[nHash]; for(i=0; i<lenSrc-NHASH; i+=NHASH){ int hv = hash_once(&zSrc[i]) % nHash; collide[i/NHASH] = landmark[hv]; landmark[hv] = i/NHASH; }

/* Begin scanning the target file and generating copy commands and ** literal sections of the delta. / base = 0; / We have already generated everything before zOut[base] / while( base+NHASH<lenOut ){ int iSrc, iBlock; unsigned int bestCnt, bestOfst=0, bestLitsz=0; hash_init(&h, &zOut[base]); i = 0; / Trying to match a landmark against zOut[base+i] */ bestCnt = 0; while( 1 ){ int hv; int limit = 250;

  hv = hash_32bit(&h) % nHash;
  iBlock = landmark[hv];
  while( iBlock>=0 && (limit--)>0 ){
    /*
    ** The hash window has identified a potential match against
    ** landmark block iBlock.  But we need to investigate further.
    **
    ** Look for a region in zOut that matches zSrc. Anchor the search
    ** at zSrc[iSrc] and zOut[base+i].  Do not include anything prior to
    ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
    **
    ** Set cnt equal to the length of the match and set ofst so that
    ** zSrc[ofst] is the first element of the match.  litsz is the number
    ** of characters between zOut[base] and the beginning of the match.
    ** sz will be the overhead (in bytes) needed to encode the copy
    ** command.  Only generate copy command if the overhead of the
    ** copy command is less than the amount of literal text to be copied.
    */
    int cnt, ofst, litsz;
    int j, k, x, y;
    int sz;
    int limitX;

    /* Beginning at iSrc, match forwards as far as we can.  j counts
    ** the number of characters that match */
    iSrc = iBlock*NHASH;
    y = base+i;
    limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
    for(x=iSrc; x<limitX; x++, y++){
      if( zSrc[x]!=zOut[y] ) break;
    }
    j = x - iSrc - 1;

    /* Beginning at iSrc-1, match backwards as far as we can.  k counts
    ** the number of characters that match */
    for(k=1; k<iSrc && k<=i; k++){
      if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
    }
    k--;

    /* Compute the offset and size of the matching region */
    ofst = iSrc-k;
    cnt = j+k+1;
    litsz = i-k;  /* Number of bytes of literal text before the copy */
    /* sz will hold the number of bytes needed to encode the "insert"
    ** command and the copy command, not counting the "insert" text */
    sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
    if( cnt>=sz && cnt>bestCnt ){
      /* Remember this match only if it is the best so far and it
      ** does not increase the file size */
      bestCnt = cnt;
      bestOfst = iSrc-k;
      bestLitsz = litsz;
    }

    /* Check the next matching block */
    iBlock = collide[iBlock];
  }

  /* We have a copy command that does not cause the delta to be larger
  ** than a literal insert.  So add the copy command to the delta.
  */
  if( bestCnt>0 ){
    if( bestLitsz>0 ){
      /* Add an insert command before the copy */
      putInt(bestLitsz,&zDelta);
      *(zDelta++) = ':';
      memcpy(zDelta, &zOut[base], bestLitsz);
      zDelta += bestLitsz;
      base += bestLitsz;
    }
    base += bestCnt;
    putInt(bestCnt, &zDelta);
    *(zDelta++) = '@';
    putInt(bestOfst, &zDelta);
    *(zDelta++) = ',';
    if( bestOfst + bestCnt -1 > lastRead ){
      lastRead = bestOfst + bestCnt - 1;
    }
    bestCnt = 0;
    break;
  }

  /* If we reach this point, it means no match is found so far */
  if( base+i+NHASH>=lenOut ){
    /* We have reached the end of the file and have not found any
    ** matches.  Do an "insert" for everything that does not match */
    putInt(lenOut-base, &zDelta);
    *(zDelta++) = ':';
    memcpy(zDelta, &zOut[base], lenOut-base);
    zDelta += lenOut-base;
    base = lenOut;
    break;
  }

  /* Advance the hash by one character.  Keep looking for a match */
  hash_next(&h, zOut[base+i+NHASH]);
  i++;
}

} /* Output a final "insert" record to get all the text at the end of ** the file that does not match anything in the source file. */ if( base<lenOut ){ putInt(lenOut-base, &zDelta); (zDelta++) = ':'; memcpy(zDelta, &zOut[base], lenOut-base); zDelta += lenOut-base; } / Output the final checksum record. */ putInt(checksum(zOut, lenOut), &zDelta); *(zDelta++) = ';'; sqlite3_free(collide); return zDelta - zOrigDelta; }

/* ** Return the size (in bytes) of the output from applying ** a delta. ** ** This routine is provided so that an procedure that is able ** to call delta_apply() can learn how much space is required ** for the output and hence allocate nor more space that is really ** needed. */ static int delta_output_size(const char *zDelta, int lenDelta){ int size; size = deltaGetInt(&zDelta, &lenDelta); if( zDelta!='\n' ){ / ERROR: size integer not terminated by "\n" */ return -1; } return size; }

/* ** Apply a delta. ** ** The output buffer should be big enough to hold the whole output ** file and a NUL terminator at the end. The delta_output_size() ** routine will determine this size for you. ** ** The delta string should be null-terminated. But the delta string ** may contain embedded NUL characters (if the input and output are ** binary files) so we also have to pass in the length of the delta in ** the lenDelta parameter. ** ** This function returns the size of the output file in bytes (excluding ** the final NUL terminator character). Except, if the delta string is ** malformed or intended for use with a source file other than zSrc, ** then this routine returns -1. ** ** Refer to the delta_create() documentation above for a description ** of the delta file format. */ static int delta_apply( const char zSrc, / The source or pattern file / int lenSrc, / Length of the source file */ const char zDelta, / Delta to apply to the pattern / int lenDelta, / Length of the delta */ char zOut / Write the output into this preallocated buffer */ ){ unsigned int limit; unsigned int total = 0; #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST char *zOrigOut = zOut; #endif

limit = deltaGetInt(&zDelta, &lenDelta); if( zDelta!='\n' ){ / ERROR: size integer not terminated by "\n" */ return -1; } zDelta++; lenDelta--; while( zDelta && lenDelta>0 ){ unsigned int cnt, ofst; cnt = deltaGetInt(&zDelta, &lenDelta); switch( zDelta[0] ){ case '@': { zDelta++; lenDelta--; ofst = deltaGetInt(&zDelta, &lenDelta); if( lenDelta>0 && zDelta[0]!=',' ){ / ERROR: copy command not terminated by ',' / return -1; } zDelta++; lenDelta--; total += cnt; if( total>limit ){ / ERROR: copy exceeds output file size / return -1; } if( ofst+cnt > lenSrc ){ / ERROR: copy extends past end of input / return -1; } memcpy(zOut, &zSrc[ofst], cnt); zOut += cnt; break; } case ':': { zDelta++; lenDelta--; total += cnt; if( total>limit ){ / ERROR: insert command gives an output larger than predicted / return -1; } if( cnt>lenDelta ){ / ERROR: insert count exceeds size of delta / return -1; } memcpy(zOut, zDelta, cnt); zOut += cnt; zDelta += cnt; lenDelta -= cnt; break; } case ';': { zDelta++; lenDelta--; zOut[0] = 0; #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST if( cnt!=checksum(zOrigOut, total) ){ / ERROR: bad checksum / return -1; } #endif if( total!=limit ){ / ERROR: generated size does not match predicted size / return -1; } return total; } default: { / ERROR: unknown delta operator / return -1; } } } / ERROR: unterminated delta */ return -1; }

/* ** SQL functions: delta_create(X,Y) ** ** Return a delta for carrying X into Y. */ static void deltaCreateFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char aOrig; int nOrig; / old blob */ const char aNew; int nNew; / new blob */ char aOut; int nOut; / output delta */

assert( argc==2 ); if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return; nOrig = sqlite3_value_bytes(argv[0]); aOrig = (const char*)sqlite3_value_blob(argv[0]); nNew = sqlite3_value_bytes(argv[1]); aNew = (const char*)sqlite3_value_blob(argv[1]); aOut = sqlite3_malloc64(nNew+70); if( aOut==0 ){ sqlite3_result_error_nomem(context); }else{ nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut); if( nOut<0 ){ sqlite3_free(aOut); sqlite3_result_error(context, "cannot create fossil delta", -1); }else{ sqlite3_result_blob(context, aOut, nOut, sqlite3_free); } } }

/* ** SQL functions: delta_apply(X,D) ** ** Return the result of applying delta D to input X. */ static void deltaApplyFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char aOrig; int nOrig; / The X input */ const char aDelta; int nDelta; / The input delta (D) */ char aOut; int nOut, nOut2; / The output */

assert( argc==2 ); if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return; nOrig = sqlite3_value_bytes(argv[0]); aOrig = (const char*)sqlite3_value_blob(argv[0]); nDelta = sqlite3_value_bytes(argv[1]); aDelta = (const char*)sqlite3_value_blob(argv[1]);

/* Figure out the size of the output */ nOut = delta_output_size(aDelta, nDelta); if( nOut<0 ){ sqlite3_result_error(context, "corrupt fossil delta", -1); return; } aOut = sqlite3_malloc64((sqlite3_int64)nOut+1); if( aOut==0 ){ sqlite3_result_error_nomem(context); }else{ nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut); if( nOut2!=nOut ){ sqlite3_free(aOut); sqlite3_result_error(context, "corrupt fossil delta", -1); }else{ sqlite3_result_blob(context, aOut, nOut, sqlite3_free); } } }

/* ** SQL functions: delta_output_size(D) ** ** Return the size of the output that results from applying delta D. */ static void deltaOutputSizeFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char aDelta; int nDelta; / The input delta (D) / int nOut; / Size of output / assert( argc==1 ); if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; nDelta = sqlite3_value_bytes(argv[0]); aDelta = (const char)sqlite3_value_blob(argv[0]);

/* Figure out the size of the output */ nOut = delta_output_size(aDelta, nDelta); if( nOut<0 ){ sqlite3_result_error(context, "corrupt fossil delta", -1); return; }else{ sqlite3_result_int(context, nOut); } }

/***************************************************************************** ** Table-valued SQL function: delta_parse(DELTA) ** ** Schema: ** ** CREATE TABLE delta_parse( ** op TEXT, ** a1 INT, ** a2 ANY, ** delta HIDDEN BLOB ** ); ** ** Given an input DELTA, this function parses the delta and returns ** rows for each entry in the delta. The op column has one of the ** values SIZE, COPY, INSERT, CHECKSUM, ERROR. ** ** Assuming no errors, the first row has op='SIZE'. a1 is the size of ** the output in bytes and a2 is NULL. ** ** After the initial SIZE row, there are zero or more 'COPY' and/or 'INSERT' ** rows. A COPY row means content is copied from the source into the ** output. Column a1 is the number of bytes to copy and a2 is the offset ** into source from which to begin copying. An INSERT row means to ** insert text into the output stream. Column a1 is the number of bytes ** to insert and column is a BLOB that contains the text to be inserted. ** ** The last row of a well-formed delta will have an op value of 'CHECKSUM'. ** The a1 column will be the value of the checksum and a2 will be NULL. ** ** If the input delta is not well-formed, then a row with an op value ** of 'ERROR' is returned. The a1 value of the ERROR row is the offset ** into the delta where the error was encountered and a2 is NULL. / typedef struct deltaparsevtab_vtab deltaparsevtab_vtab; typedef struct deltaparsevtab_cursor deltaparsevtab_cursor; struct deltaparsevtab_vtab { sqlite3_vtab base; / Base class - must be first / / No additional information needed / }; struct deltaparsevtab_cursor { sqlite3_vtab_cursor base; / Base class - must be first */ char aDelta; / The delta being parsed / int nDelta; / Number of bytes in the delta / int iCursor; / Current cursor location / int eOp; / Name of current operator / unsigned int a1, a2; / Arguments to current operator / int iNext; / Next cursor value */ };

/* Operator names: */ static const char *azOp[] = { "SIZE", "COPY", "INSERT", "CHECKSUM", "ERROR", "EOF" }; #define DELTAPARSE_OP_SIZE 0 #define DELTAPARSE_OP_COPY 1 #define DELTAPARSE_OP_INSERT 2 #define DELTAPARSE_OP_CHECKSUM 3 #define DELTAPARSE_OP_ERROR 4 #define DELTAPARSE_OP_EOF 5

/* ** The deltaparsevtabConnect() method is invoked to create a new ** deltaparse virtual table. ** ** Think of this routine as the constructor for deltaparsevtab_vtab objects. ** ** All this routine needs to do is: ** ** (1) Allocate the deltaparsevtab_vtab object and initialize all fields. ** ** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the ** result set of queries against the virtual table will look like. */ static int deltaparsevtabConnect( sqlite3 *db, void *pAux, int argc, const char constargv, sqlite3_vtab **ppVtab, char **pzErr ){ deltaparsevtab_vtab *pNew; int rc;

rc = sqlite3_declare_vtab(db, "CREATE TABLE x(op,a1,a2,delta HIDDEN)" ); /* For convenience, define symbolic names for the index to each column. */ #define DELTAPARSEVTAB_OP 0 #define DELTAPARSEVTAB_A1 1 #define DELTAPARSEVTAB_A2 2 #define DELTAPARSEVTAB_DELTA 3 if( rc==SQLITE_OK ){ pNew = sqlite3_malloc64( sizeof(*pNew) ); ppVtab = (sqlite3_vtab)pNew; if( pNew==0 ) return SQLITE_NOMEM; memset(pNew, 0, sizeof(*pNew)); sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS); } return rc; }

/* ** This method is the destructor for deltaparsevtab_vtab objects. */ static int deltaparsevtabDisconnect(sqlite3_vtab *pVtab){ deltaparsevtab_vtab p = (deltaparsevtab_vtab)pVtab; sqlite3_free(p); return SQLITE_OK; }

/* ** Constructor for a new deltaparsevtab_cursor object. */ static int deltaparsevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ deltaparsevtab_cursor *pCur; pCur = sqlite3_malloc( sizeof(*pCur) ); if( pCur==0 ) return SQLITE_NOMEM; memset(pCur, 0, sizeof(*pCur)); *ppCursor = &pCur->base; return SQLITE_OK; }

/* ** Destructor for a deltaparsevtab_cursor. */ static int deltaparsevtabClose(sqlite3_vtab_cursor *cur){ deltaparsevtab_cursor pCur = (deltaparsevtab_cursor)cur; sqlite3_free(pCur->aDelta); sqlite3_free(pCur); return SQLITE_OK; }

/* ** Advance a deltaparsevtab_cursor to its next row of output. */ static int deltaparsevtabNext(sqlite3_vtab_cursor *cur){ deltaparsevtab_cursor pCur = (deltaparsevtab_cursor)cur; const char *z; int i = 0;

pCur->iCursor = pCur->iNext; z = pCur->aDelta + pCur->iCursor; pCur->a1 = deltaGetInt(&z, &i); switch( z[0] ){ case '@': { z++; pCur->a2 = deltaGetInt(&z, &i); pCur->eOp = DELTAPARSE_OP_COPY; pCur->iNext = (int)(&z[1] - pCur->aDelta); break; } case ':': { z++; pCur->a2 = (unsigned int)(z - pCur->aDelta); pCur->eOp = DELTAPARSE_OP_INSERT; pCur->iNext = (int)(&z[pCur->a1] - pCur->aDelta); break; } case ';': { pCur->eOp = DELTAPARSE_OP_CHECKSUM; pCur->iNext = pCur->nDelta; break; } default: { if( pCur->iNext==pCur->nDelta ){ pCur->eOp = DELTAPARSE_OP_EOF; }else{ pCur->eOp = DELTAPARSE_OP_ERROR; pCur->iNext = pCur->nDelta; } break; } } return SQLITE_OK; }

/* ** Return values of columns for the row at which the deltaparsevtab_cursor ** is currently pointing. */ static int deltaparsevtabColumn( sqlite3_vtab_cursor cur, / The cursor */ sqlite3_context ctx, / First argument to sqlite3_result_...() / int i / Which column to return */ ){ deltaparsevtab_cursor pCur = (deltaparsevtab_cursor)cur; switch( i ){ case DELTAPARSEVTAB_OP: { sqlite3_result_text(ctx, azOp[pCur->eOp], -1, SQLITE_STATIC); break; } case DELTAPARSEVTAB_A1: { sqlite3_result_int(ctx, pCur->a1); break; } case DELTAPARSEVTAB_A2: { if( pCur->eOp==DELTAPARSE_OP_COPY ){ sqlite3_result_int(ctx, pCur->a2); }else if( pCur->eOp==DELTAPARSE_OP_INSERT ){ sqlite3_result_blob(ctx, pCur->aDelta+pCur->a2, pCur->a1, SQLITE_TRANSIENT); } break; } case DELTAPARSEVTAB_DELTA: { sqlite3_result_blob(ctx, pCur->aDelta, pCur->nDelta, SQLITE_TRANSIENT); break; } } return SQLITE_OK; }

/* ** Return the rowid for the current row. In this implementation, the ** rowid is the same as the output value. */ static int deltaparsevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ deltaparsevtab_cursor pCur = (deltaparsevtab_cursor)cur; *pRowid = pCur->iCursor; return SQLITE_OK; }

/* ** Return TRUE if the cursor has been moved off of the last ** row of output. */ static int deltaparsevtabEof(sqlite3_vtab_cursor *cur){ deltaparsevtab_cursor pCur = (deltaparsevtab_cursor)cur; return pCur->eOp==DELTAPARSE_OP_EOF; }

/* ** This method is called to "rewind" the deltaparsevtab_cursor object back ** to the first row of output. This method is always called at least ** once prior to any call to deltaparsevtabColumn() or deltaparsevtabRowid() or ** deltaparsevtabEof(). */ static int deltaparsevtabFilter( sqlite3_vtab_cursor *pVtabCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor *)pVtabCursor; const char a; int i = 0; pCur->eOp = DELTAPARSE_OP_ERROR; if( idxNum!=1 ){ return SQLITE_OK; } pCur->nDelta = sqlite3_value_bytes(argv[0]); a = (const char)sqlite3_value_blob(argv[0]); if( pCur->nDelta==0 || a==0 ){ return SQLITE_OK; } pCur->aDelta = sqlite3_malloc64( pCur->nDelta+1 ); if( pCur->aDelta==0 ){ pCur->nDelta = 0; return SQLITE_NOMEM; } memcpy(pCur->aDelta, a, pCur->nDelta); pCur->aDelta[pCur->nDelta] = 0; a = pCur->aDelta; pCur->eOp = DELTAPARSE_OP_SIZE; pCur->a1 = deltaGetInt(&a, &i); if( a[0]!='\n' ){ pCur->eOp = DELTAPARSE_OP_ERROR; pCur->a1 = pCur->a2 = 0; pCur->iNext = pCur->nDelta; return SQLITE_OK; } a++; pCur->iNext = (unsigned int)(a - pCur->aDelta); return SQLITE_OK; }

/* ** SQLite will invoke this method one or more times while planning a query ** that uses the virtual table. This routine needs to create ** a query plan for each invocation and compute an estimated cost for that ** plan. */ static int deltaparsevtabBestIndex( sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo ){ int i; for(i=0; inConstraint; i++){ if( pIdxInfo->aConstraint[i].iColumn != DELTAPARSEVTAB_DELTA ) continue; if( pIdxInfo->aConstraint[i].usable==0 ) continue; if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue; pIdxInfo->aConstraintUsage[i].argvIndex = 1; pIdxInfo->aConstraintUsage[i].omit = 1; pIdxInfo->estimatedCost = (double)1; pIdxInfo->estimatedRows = 10; pIdxInfo->idxNum = 1; return SQLITE_OK; } pIdxInfo->idxNum = 0; pIdxInfo->estimatedCost = (double)0x7fffffff; pIdxInfo->estimatedRows = 0x7fffffff; return SQLITE_CONSTRAINT; }

/* ** This following structure defines all the methods for the ** virtual table. / static sqlite3_module deltaparsevtabModule = { / iVersion / 0, / xCreate / 0, / xConnect / deltaparsevtabConnect, / xBestIndex / deltaparsevtabBestIndex, / xDisconnect / deltaparsevtabDisconnect, / xDestroy / 0, / xOpen / deltaparsevtabOpen, / xClose / deltaparsevtabClose, / xFilter / deltaparsevtabFilter, / xNext / deltaparsevtabNext, / xEof / deltaparsevtabEof, / xColumn / deltaparsevtabColumn, / xRowid / deltaparsevtabRowid, / xUpdate / 0, / xBegin / 0, / xSync / 0, / xCommit / 0, / xRollback / 0, / xFindMethod / 0, / xRename / 0, / xSavepoint / 0, / xRelease / 0, / xRollbackTo / 0, / xShadowName / 0, / xIntegrity */ 0 };

#ifdef _WIN32 __declspec(dllexport) #endif int sqlite3_fossildelta_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines pApi ){ static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS; int rc = SQLITE_OK; SQLITE_EXTENSION_INIT2(pApi); (void)pzErrMsg; / Unused parameter */ rc = sqlite3_create_function(db, "delta_create", 2, enc, 0, deltaCreateFunc, 0, 0); if( rc==SQLITE_OK ){ rc = sqlite3_create_function(db, "delta_apply", 2, enc, 0, deltaApplyFunc, 0, 0); } if( rc==SQLITE_OK ){ rc = sqlite3_create_function(db, "delta_output_size", 1, enc, 0, deltaOutputSizeFunc, 0, 0); } if( rc==SQLITE_OK ){ rc = sqlite3_create_module(db, "delta_parse", &deltaparsevtabModule, 0); } return rc; } `