fixed bugs reported by m^3 · inikep/lizard@2815b94 (original) (raw)

4 files changed

lines changed

Original file line number	Diff line number	Diff line change
@@ -10,6 +10,12 @@ The improvement in compression ratio is caused mainly because of:
10	10
11	11	In my experiments there is no open-source bytewise compressor that gives better ratio than lz5hc.
12	12
	13	+[LZ4]: https://github.com/Cyan4973/lz4
	14	+
	15	+
	16	+The codewords description
	17	+-------------------------
	18	+
13	19	LZ5 uses different output codewords and is not compatible with LZ4. LZ4 output codewords are 3 byte long (24-bit) and look as follows:
14	20	- LLLL_MMMM OOOOOOOO OOOOOOOO - 16-bit offset, 4-bit match length, 4-bit literal length
15	21
@@ -25,7 +31,6 @@ So we can encode values 0-7 (3-bits) for matches (what means length of 3-10 for
25	31	that 10 bytes. So e.g. 30 is encoded as a flag 7 (match length=10) and a next byte 30-10=20. I tried many different variants (e.g. separate match lenghts and literal lenghts)
26	32	but these codewords were the best.
27	33
28		-[LZ4]: https://github.com/Cyan4973/lz4
29	34
30	35	Benchmarks
31	36	-------------------------

Original file line number	Diff line number	Diff line change
@@ -743,12 +743,12 @@ int LZ5_loadDict (LZ5_stream_t* LZ5_dict, const char* dictionary, int dictSize)
743	743	if ((dict->initCheck) \|
744	744	LZ5_resetStream(LZ5_dict);
745	745
746		-if (dictSize < (int)HASH_UNIT)
	746	+/* if (dictSize < (int)HASH_UNIT)
747	747	{
748	748	dict->dictionary = NULL;
749	749	dict->dictSize = 0;
750	750	return 0;
751		- }
	751	+ }*/
752	752
753	753	if ((dictEnd - p) > LZ5_DICT_SIZE) p = dictEnd - LZ5_DICT_SIZE;
754	754	dict->currentOffset += LZ5_DICT_SIZE;
@@ -863,6 +863,8 @@ int LZ5_compress_forceExtDict (LZ5_stream_t* LZ5_dict, const char* source, char*
863	863	int LZ5_saveDict (LZ5_stream_t* LZ5_dict, char* safeBuffer, int dictSize)
864	864	{
865	865	LZ5_stream_t_internal* dict = (LZ5_stream_t_internal*) LZ5_dict;
	866	+if (!dict->dictionary)
	867	+return 0;
866	868	const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
867	869
868	870	if ((U32)dictSize > LZ5_DICT_SIZE) dictSize = LZ5_DICT_SIZE; /* useless to define a dictionary > LZ5_DICT_SIZE */

Original file line number	Diff line number	Diff line change
@@ -1627,7 +1627,7 @@ int LZ5_saveDictHC (LZ5_streamHC_t* LZ5_streamHCPtr, char* safeBuffer, int dictS
1627	1627	LZ5HC_Data_Structure* streamPtr = (LZ5HC_Data_Structure*)LZ5_streamHCPtr;
1628	1628	int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
1629	1629	if (dictSize > LZ5_DICT_SIZE) dictSize = LZ5_DICT_SIZE;
1630		-if (dictSize < 4) dictSize = 0;
	1630	+// if (dictSize < 4) dictSize = 0;
1631	1631	if (dictSize > prefixSize) dictSize = prefixSize;
1632	1632	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
1633	1633	{

Original file line number	Diff line number	Diff line change
@@ -576,7 +576,7 @@ static int local_LZ5_decompress_safe_partial(const char* in, char* out, int inSi
576	576	/* frame functions */
577	577	static int local_LZ5F_compressFrame(const char* in, char* out, int inSize)
578	578	{
579		-return (int)LZ5F_compressFrame(out, 2*inSize + 16, in, inSize, NULL);
	579	+return (int)LZ5F_compressFrame(out, LZ5F_compressFrameBound(inSize, NULL), in, inSize, NULL);
580	580	}
581	581
582	582	static LZ5F_decompressionContext_t g_dCtx;
@@ -731,7 +731,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
731	731	chunkP[0].origSize = (int)benchedSize; nbChunks=1;
732	732	break;
733	733	case 40: compressionFunction = local_LZ5_saveDict; compressorName = "LZ5_saveDict";
734		-LZ5_loadDict(&LZ5_stream, chunkP[0].origBuffer, chunkP[0].origSize);
	734	+if (!LZ5_loadDict(&LZ5_stream, chunkP[0].origBuffer, chunkP[0].origSize)) continue;
735	735	break;
736	736	case 41: compressionFunction = local_LZ5_saveDictHC; compressorName = "LZ5_saveDictHC";
737	737	if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0))