Using and extending the code (original) (raw)

Compressing/Decompressing data (C++)

Here is how to compress/decompress a block to/from a file using RLT+TEXT as transform, Huffman as entropy codec, using a block size of 1 MB, 4 jobs and a checksum.

Just create a CompressedOutputStream/CompressedInputStream to write/read compressed data.

Notice that CompressedOutputStream is a std::ostream and CompressedInputStream is a std::istream, so most operations on streams work as usual.

#include #include #include "types.hpp" #include "InputStream.hpp" #include "OutputStream.hpp" #include "io/CompressedInputStream.hpp" #include "io/CompressedOutputStream.hpp"

using namespace kanzi; using namespace std;

uint64 testCompress(byte block[], uint length) { // Create an OutputStream OutputStream* os = new ofstream("compressed.knz", ofstream::out | ofstream::binary);

    // Create a CompressedOutputStream
    CompressedOutputStream cos(*os, "HUFFMAN", "RLT+TEXT", 1024 * 1024, true, 4);

    // Compress block
    cos.write((const char*) block, length);

    // Close CompressedOutputStream
    cos.close();

    // Get number of bytes written
    uint64 written = cos.getWritten();
    delete os;
    return written;

}

uint64 testDecompress(byte block[], uint length) { // Create an InputStream InputStream* is = new ifstream("compressed.knz", ifstream::in | ifstream::binary);

    // Create a CompressedInputStream
    CompressedInputStream cis(*is, 4);

    // Decompress block
    cis.read((char*) block, length);

    // Close CompressedInputStream
    cis.close();

    // Get number of bytes read
    uint64 read = cis.getRead();
    delete is;
    return read;

}

int main(int argc, const char** argv) { byte block[65536]; FILE* in = fopen("/tmp/enwik8", "rb"); const int sz = fread(block, 1, 65536, in);

if (sz > 0) {
   uint64 c = testCompress(block, sz);
   cout << "Block compressed from " << r << " bytes to " << c << " bytes" << endl; 
   testDecompress(block, sz);
}

return 0;

}

Compressing/Decompressing data (C)

Kanzi exposes a C API (see api/libapi.hpp) and can be built as a static (.a) or a dynamic library (.so/.dll).

Below is an example of a C program compressing and decompressing data using the C API.

/* EG. gcc testAPI.c -o r:\testAPI.exe -lkanzi / / EG. gcc testAPI.c -o testAPI -lkanzi */

#include "api/libapi.hpp" #include <stdlib.h>

int testDecompress() { const int blkSize = 4 * 1024 * 1024; struct dData dd = { blkSize, 4, 0 }; FILE* in = fopen("/tmp/enwik8.knz", "rb"); struct dContext* ctx; int res = 0;

if ((res = initDecompressor(&dd, in, &ctx)) != 0) {
    fclose(in);
    return res;
}

FILE* out = fopen("/tmp/enwik8.knz.bak", "wb");
int r = 0, w = 0, inSize = 0, outSize = 0;
BYTE* dst = (BYTE*)malloc(blkSize);

do {
    w = blkSize;
    r = decompress(ctx, dst, &r, &w);

    if ((w = fwrite(dst, 1, w, out)) == 0)
        break;

    inSize += r;
    outSize += w;
} while (r == 0);

if ((res = disposeDecompressor(ctx)) == 0) {
   printf("Size before decompression: %i bytes(s)\n", inSize);
   printf("Size after decompression:  %i bytes(s)\n", outSize);
}

fclose(in);
fclose(out);
free(dst);
return res;

}

int testCompress() { const int blkSize = 4 * 1024 * 1024; struct cData cd = { "BWT+RANK+MTFT", "FPAQ", blkSize, 4, 1, 0 }; FILE* in = fopen("/tmp/enwik8", "rb"); struct cContext* ctx; int res = 0;

if ((res = initCompressor(&cd, out, &ctx)) != 0) {
    fclose(in);
    return res;
}

FILE* out = fopen("/tmp/enwik8.knz", "wb");
int r = 0, w = 0, inSize = 0, outSize = 0;
BYTE* src = (BYTE*)malloc(blkSize);

while ((r = fread(src, 1, blkSize, in)) != 0) {
    if ((res = compress(ctx, src, &r, &w)) != 0)
        break;

    inSize += r;
    outSize += w;
}

if ((res = disposeCompressor(ctx, &w)) == 0) {
   outSize += w;
   printf("Size before compression: %i bytes(s)\n", inSize);
   printf("Size after compression:  %i bytes(s)\n", outSize);
}

fclose(in);
fclose(out);
free(src);
return res;

}

int main(int argc, const char** argv) { int res;

if ((res = testCompress()) != 0)
    return res;

if ((res = testDecompress()) != 0)
    return res;

return 0;

}

Implementing a new transform

Here is how to implement and add a new transform to kanzi.

For example:

#include "../Context.hpp" #include "../Transform.hpp"

class SuperDuperTransform : public Transform { public: SuperDuperTransform() {} SuperDuperTransform(Context&) {} ~SuperDuperTransform() {}

   bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW { 
         // Ensure enough room in the destination buffer
         if (output._length - output._index < getMaxEncodedLength(length))
              return false;

         byte* src = &input._array[input._index];
         byte* dst = &output._array[output._index];

         for (int i = 0; i < length; i++)
             dst[i] = src[i] ^ byte(0xAA);

         input._index += length;
         output._index += length;
         return true; 
   }

   bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW { 
         byte* src = &input._array[input._index];
         byte* dst = &output._array[output._index];

         for (int i = 0; i < length; i++)
             dst[i] = src[i] ^ byte(0xAA);

         input._index += length;
         output._index += length;
         return true; 
   }

   int getMaxEncodedLength(int inputLen) const { return inputLen; }

};

Always provide a constructor with a Context: the context contains all the application wide information (such as block size, number of jobs, input & output names, etc ...). Always inherit from Transform and respect the maximum number of jobs provided in the context. Implement forward and inverse methods as well as getMaxEncodedLength(int). Do not write to stdio or stderr. Be aware that your code must be multi-thread safe.

Add the type, say

static const uint64 SUPERDUPER_TYPE = 63;

Let us say you use the name "SUPERDUPER" for the transform. Update the following methods:

template uint64 TransformFactory::getTypeToken(const char* tName) THROW template Transform* TransformFactory::newToken(Context& ctx, uint64 functionType) THROW template const char* TransformFactory::getNameToken(uint64 functionType) THROW

In Kanzi::printHelp, add the SUPERDUPER transform to the list in the -t option section.