aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDylan <boss@tehbox.org>2026-04-02 11:58:24 +1300
committerDylan <boss@tehbox.org>2026-04-02 11:58:24 +1300
commite9a32a966d49ab798a4c6f24471669fc03adb395 (patch)
tree6373120682604c489816df391c45f17841a838b6 /src
parent93a78ac64327b53f53952b625c7ce8a11bcc8651 (diff)
downloadtehimage-e9a32a966d49ab798a4c6f24471669fc03adb395.tar.gz
tehimage-e9a32a966d49ab798a4c6f24471669fc03adb395.zip
feat: Added basic zlib compression
Currently not incredibly well implemented, but it can handle fixed codes
Diffstat (limited to 'src')
-rw-r--r--src/HashTable.cpp85
-rw-r--r--src/HashTable.h26
-rw-r--r--src/PNGImage.cpp2
-rw-r--r--src/PNGImage.h2
-rw-r--r--src/zlib.cpp258
-rw-r--r--src/zlib.h29
6 files changed, 338 insertions, 64 deletions
diff --git a/src/HashTable.cpp b/src/HashTable.cpp
new file mode 100644
index 0000000..685dad6
--- /dev/null
+++ b/src/HashTable.cpp
@@ -0,0 +1,85 @@
+#include "HashTable.h"
+#include <stdexcept>
+
+namespace TehImage
+{
+ HashTable::HashTable(size_t size)
+ :TABLE_SIZE(size),
+ table(TABLE_SIZE)
+ { }
+
+ unsigned int HashTable::hashFunction(char key[3])
+ {
+ unsigned int hash = 0;
+ std::memcpy(&hash, key, 3);
+ return hash % TABLE_SIZE;
+ }
+
+ void HashTable::insert(char key[3], unsigned int value)
+ {
+ unsigned int index = hashFunction(key);
+ unsigned int keyMem = 0;
+ std::memcpy(&keyMem, key, 3);
+
+ for(auto& pair : table[index])
+ {
+ if(pair.first == keyMem)
+ {
+ pair.second = value;
+ return;
+ }
+ }
+
+ table[index].push_back({keyMem, value});
+ }
+
+ void HashTable::remove(char key[3])
+ {
+
+ unsigned int index = hashFunction(key);
+ unsigned int keyMem = 0;
+ std::memcpy(&keyMem, key, 3);
+
+ auto& chain = table[index];
+ for (auto it = chain.begin(); it != chain.end(); ++it) {
+ if (it->first == keyMem) {
+ chain.erase(it);
+ return;
+ }
+ }
+ }
+
+ unsigned int HashTable::get(char key[3])
+ {
+ unsigned int index = hashFunction(key);
+ unsigned int keyMem = 0;
+ std::memcpy(&keyMem, key, 3);
+
+ for(auto& pair : table[index])
+ {
+ if(pair.first == keyMem)
+ {
+ return pair.second;
+ }
+ }
+
+ throw std::out_of_range("Key not present");
+ }
+
+ bool HashTable::contains(char key[3])
+ {
+ unsigned int index = hashFunction(key);
+ unsigned int keyMem = 0;
+ std::memcpy(&keyMem, key, 3);
+
+ for(auto& pair : table[index])
+ {
+ if(pair.first == keyMem)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+}
diff --git a/src/HashTable.h b/src/HashTable.h
new file mode 100644
index 0000000..1ede7be
--- /dev/null
+++ b/src/HashTable.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <cstddef>
+#include <cstring>
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace TehImage
+{
+ class HashTable
+ {
+ private:
+ const size_t TABLE_SIZE;
+ std::vector<std::list<std::pair<unsigned int, unsigned int>>> table;
+
+ public:
+ HashTable(size_t size);
+
+ unsigned int hashFunction(char key[3]);
+ void insert(char key[3], unsigned int value);
+ void remove(char key[3]);
+ unsigned int get(char key[3]);
+ bool contains(char key[3]);
+ };
+}
diff --git a/src/PNGImage.cpp b/src/PNGImage.cpp
index 9ab0751..677c811 100644
--- a/src/PNGImage.cpp
+++ b/src/PNGImage.cpp
@@ -185,7 +185,7 @@ namespace TehImage
char outData[1024];
- ZLibInflator inflator;
+ ZLib inflator;
inflator.decodeData((uint8_t*)compressedData, chunkSize - 4, (uint8_t*)outData, 1024);
cout << "iCCP not supported" << endl;
diff --git a/src/PNGImage.h b/src/PNGImage.h
index a5d8f3a..fa78e0e 100644
--- a/src/PNGImage.h
+++ b/src/PNGImage.h
@@ -60,7 +60,7 @@ namespace TehImage
CHUNK_READER(IDAT);
CHUNK_READER(IEND);
- ZLibInflator zlib;
+ ZLib zlib;
uint8_t* idatData;
unsigned long idatDataSize;
diff --git a/src/zlib.cpp b/src/zlib.cpp
index 97f2ded..022b2fc 100644
--- a/src/zlib.cpp
+++ b/src/zlib.cpp
@@ -1,5 +1,7 @@
#include "zlib.h"
+#include "HashTable.h"
+
#include <bitset>
#include <cstdint>
#include <cstring>
@@ -13,8 +15,25 @@ using std::cout, std::endl;
namespace TehImage
{
+
+ const unsigned int lenStart[] = { /* Size base for length codes 257..285 */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
+ const unsigned int lenExtra[] = { /* Extra bits for length codes 257..285 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
- void ZLibInflator::calculateCodes(uint8_t* lengths, uint16_t* codesOut, int codeCount)
+
+ const unsigned int distStart[] = { /* Offset base for distance codes 0..29 */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577};
+ const unsigned int distExtra[] = { /* Extra bits for distance codes 0..29 */
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+ 12, 12, 13, 13};
+
+ void ZLib::calculateCodes(uint8_t* lengths, uint16_t* codesOut, int codeCount)
{
const int biggestLen = 15;
@@ -43,14 +62,12 @@ namespace TehImage
}
}
- void ZLibInflator::buildHuffmanTree(uint8_t* lengths, uint16_t* codes, int codeCount)
+ void ZLib::buildHuffmanTree(uint8_t* lengths, uint16_t* codes, int codeCount)
{
buildHuffmanTree(lengths, codes, codeCount, &tree);
}
-
-
- void ZLibInflator::buildHuffmanTree(uint8_t* lengths, uint16_t* codes, int codeCount, HuffmanTree* tree)
+ void ZLib::buildHuffmanTree(uint8_t* lengths, uint16_t* codes, int codeCount, HuffmanTree* tree)
{
for(uint16_t i = 0; i < codeCount; i++)
{
@@ -87,7 +104,32 @@ namespace TehImage
}
}
- void ZLibInflator::buildStaticHuffmanTree()
+ void ZLib::getStaticHuffmanCodes(uint16_t* codesOut, uint8_t* codeLensOut, uint16_t* distCodesOut, uint8_t* distCodeLensOut)
+ {
+
+ const int codeCount = 288;
+ for(int i = 0; i < codeCount; i++)
+ {
+ if(i < 144)
+ codeLensOut[i] = 8;
+ else if(i < 256)
+ codeLensOut[i] = 9;
+ else if(i < 280)
+ codeLensOut[i] = 7;
+ else if(i < 288)
+ codeLensOut[i] = 8;
+ }
+ calculateCodes(codeLensOut, codesOut, codeCount);
+
+ const uint8_t nDistCodes = 32;
+
+ for(int i = 0; i < nDistCodes; i++)
+ distCodeLensOut[i] = 5;
+
+ calculateCodes(distCodeLensOut, distCodesOut, nDistCodes);
+ }
+
+ void ZLib::buildStaticHuffmanTree()
{
if(staticTree)
return;
@@ -96,37 +138,23 @@ namespace TehImage
staticTree = true;
haveTree = true;
}
- void ZLibInflator::buildStaticHuffmanTree(HuffmanTree* treeOut, HuffmanTree* distTreeOut)
+ void ZLib::buildStaticHuffmanTree(HuffmanTree* treeOut, HuffmanTree* distTreeOut)
{
const int codeCount = 288;
uint8_t lens[codeCount];
uint16_t codes[codeCount];
- for(int i = 0; i < codeCount; i++)
- {
- if(i < 144)
- lens[i] = 8;
- else if(i < 256)
- lens[i] = 9;
- else if(i < 280)
- lens[i] = 7;
- else if(i < 288)
- lens[i] = 8;
- }
- calculateCodes(lens, codes, codeCount);
- buildHuffmanTree(lens, codes, codeCount, treeOut);
- uint8_t nDistCodes = 32;
+ const uint8_t nDistCodes = 32;
uint8_t distCodeLens[nDistCodes];
uint16_t distCodes[nDistCodes];
- for(int i = 0; i < nDistCodes; i++)
- distCodeLens[i] = i;
+ getStaticHuffmanCodes(codes, lens, distCodes, distCodeLens);
- calculateCodes(distCodeLens, distCodes, nDistCodes);
+ buildHuffmanTree(lens, codes, codeCount, treeOut);
buildHuffmanTree(distCodeLens, distCodes, nDistCodes, distTreeOut);
}
- void ZLibInflator::buildDynamicHuffmanTree(StreamData* stream)
+ void ZLib::buildDynamicHuffmanTree(StreamData* stream)
{
if(haveTree)
{
@@ -136,7 +164,7 @@ namespace TehImage
buildDynamicHuffmanTree(stream, &tree, &distTree);
haveTree = true;
}
- void ZLibInflator::buildDynamicHuffmanTree(StreamData* stream, HuffmanTree* treeOut, HuffmanTree* distTreeOut)
+ void ZLib::buildDynamicHuffmanTree(StreamData* stream, HuffmanTree* treeOut, HuffmanTree* distTreeOut)
{
unsigned int nLitCodes = nextBits(stream, 5) + 257;
uint16_t litCodes[nLitCodes];
@@ -220,12 +248,12 @@ namespace TehImage
free();
}
- uint16_t ZLibInflator::getNextCode(StreamData* stream)
+ uint16_t ZLib::getNextCode(StreamData* stream)
{
return getNextCode(stream, &tree);
}
- uint16_t ZLibInflator::getNextCode(StreamData* stream, HuffmanTree* tree)
+ uint16_t ZLib::getNextCode(StreamData* stream, HuffmanTree* tree)
{
while(tree->val == 0xFFFF)
{
@@ -240,7 +268,7 @@ namespace TehImage
return tree->val;
}
- bool ZLibInflator::nextBit(StreamData* stream)
+ bool ZLib::nextBit(StreamData* stream)
{
long bit = stream->pos % 8;
long byte = stream->pos / 8;
@@ -254,38 +282,51 @@ namespace TehImage
return (stream->data[byte] & (0b1 << bit))?0b1:0b0;
}
- uint16_t ZLibInflator::nextBits(StreamData* stream, int bits)
+ uint16_t ZLib::nextBits(StreamData* stream, int count)
{
- if(bits > 16)
- cout << "Too many bits(" << bits << ")!!!" << endl;
+ if(count > 16)
+ cout << "Too many bits(" << count << ")!!!" << endl;
uint16_t out = 0;
- for(int i = 0; i < bits; i++)
+ for(int i = 0; i < count; i++)
{
out |= nextBit(stream) << i;
}
return out;
}
- int ZLibInflator::decodeData(uint8_t* data, unsigned long length, uint8_t* out, unsigned long outLength)
+ void ZLib::writeBit(StreamData* stream, bool bit)
{
- staticTree = false;
- const unsigned int lenStart[] = { /* Size base for length codes 257..285 */
- 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
- 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
- const unsigned int lenExtra[] = { /* Extra bits for length codes 257..285 */
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
- 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
+ long bitPos = stream->pos % 8;
+ long bytePos = stream->pos / 8;
+ if(bytePos >= stream->length)
+ {
+ cout << bytePos << " " << stream->length << endl;
+ throw std::out_of_range("Ran out of space");
+ }
+ stream->pos++;
+ if(bit)
+ stream->data[bytePos] |= (0b1 << bitPos);
+ }
-
- const unsigned int distStart[] = { /* Offset base for distance codes 0..29 */
- 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
- 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
- 8193, 12289, 16385, 24577};
- const unsigned int distExtra[] = { /* Extra bits for distance codes 0..29 */
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
- 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
- 12, 12, 13, 13};
+ void ZLib::writeBits(StreamData* stream, int count, uint16_t bits)
+ {
+ for(int i = 0; i < count; i++)
+ {
+ writeBit(stream, bits & (0b1 << i));
+ }
+ }
+
+ void ZLib::writeCode(StreamData* stream, uint16_t code, uint8_t codeLen)
+ {
+ for(int i = codeLen - 1; i >= 0; i--)
+ {
+ writeBit(stream, code & (0b1 << i));
+ }
+ }
+ int ZLib::decodeData(uint8_t* data, unsigned long length, uint8_t* out, unsigned long outLength)
+ {
+ staticTree = false;
StreamData stream = { data, length, 0};
long outPos = 0;
@@ -295,23 +336,22 @@ namespace TehImage
{
final = nextBit(&stream);
- int method = nextBits(&stream, 2);
+ CompressionType method = (CompressionType)nextBits(&stream, 2);
//cout << (final?"Final chunk!\n":"") << "Compression method: " << method << endl;
//cout << outPos << " " << outLength << endl;
- if(method == 1)
+ if(method == CompressionType::STATIC_CODES)
buildStaticHuffmanTree();
- else if(method == 2)
+ else if(method == CompressionType::DYNAMIC_CODES)
buildDynamicHuffmanTree(&stream);
- else if(method == 0)
+ else if(method == CompressionType::NONE)
{
while(stream.pos%8 != 0)
stream.pos++;
- uint16_t LEN = nextBits(&stream, 16);
- uint16_t NLEN = nextBits(&stream, 16);
- NLEN++;
+ int16_t LEN = nextBits(&stream, 16);
+ int16_t NLEN = nextBits(&stream, 16) + 1;
if(LEN + NLEN != 0)
throw std::invalid_argument("NLEN and LEN don't match");
for(int i = 0; i < LEN; i++)
@@ -358,4 +398,108 @@ namespace TehImage
return 0;
}
+ int ZLib::encodeData(uint8_t* data, unsigned long length, uint8_t* out, unsigned long outLength)
+ {
+
+ StreamData outStream = { out, outLength, 0};
+ unsigned long inPos = 0;
+
+ CompressionType compressionType = STATIC_CODES;
+
+ // BFINAL
+ writeBit(&outStream, 1);
+ // BTYPE
+ writeBits(&outStream, 2, compressionType);
+
+ if(compressionType == NONE)
+ {
+ // Padding for non-compressed
+ while(outStream.pos % 8 != 0)
+ writeBit(&outStream, 0);
+
+ // LEN
+ writeBits(&outStream, 16, (uint16_t) length);
+ // NLEN
+ writeBits(&outStream, 16, ((uint16_t) length) ^ 65535);
+
+ // Write the actual bytes
+ while(inPos < length)
+ {
+ writeBits(&outStream, 8, data[inPos++]);
+ }
+
+ // Return length of compressed data in bytes
+ return (outStream.pos / 8) + ((outStream.pos % 8 == 0)?0:1);
+ }
+ if(compressionType == STATIC_CODES)
+ {
+ uint16_t codes[288];
+ uint8_t codeLens[288];
+ uint16_t distCodes[32];
+ uint8_t distCodeLens[32];
+ getStaticHuffmanCodes(codes, codeLens, distCodes, distCodeLens);
+
+ // cout << inPos << " " << length << endl;
+ HashTable hashTable(255);
+ while(inPos < length)
+ {
+ if(inPos + 2 >= length)
+ {
+ writeCode(&outStream, codes[data[inPos]], codeLens[data[inPos]]);
+ inPos++;
+ continue;
+ }
+
+ char curr[3];
+ memcpy(curr, &data[inPos], 3);
+ if(hashTable.contains(curr))
+ {
+ unsigned long pos = hashTable.get(curr);
+ int length = 3;
+ while(data[pos + length] == data[inPos + length])
+ length++;
+
+
+ uint16_t lenCode, lenExtraBits;
+ for(uint16_t i = 0; i < 30; i++)
+ {
+ if(lenStart[i] <= length && lenStart[i] + (0b1 << lenExtra[i]) - 1 >= length)
+ {
+ lenCode = i;
+ lenExtraBits = length - lenStart[i];
+ }
+ }
+ unsigned long dist = inPos - pos;
+ uint16_t distCode, distExtraBits;
+ for(uint16_t i = 0; i < 30; i++)
+ {
+ if(distStart[i] <= dist && distStart[i] + (0b1 << distExtra[i]) - 1 >= dist)
+ {
+ distCode = i;
+ distExtraBits = dist - distCodes[i];
+ }
+ }
+
+ writeCode(&outStream, codes[lenCode + 257], codeLens[lenCode + 257]);
+ writeBits(&outStream, lenExtra[lenCode], lenExtraBits);
+ writeCode(&outStream, distCodes[distCode], distCodeLens[distCode]);
+ writeBits(&outStream, distExtra[distCode], distExtraBits);
+ inPos += length;
+
+ }
+ else
+ {
+ writeCode(&outStream, codes[data[inPos]], codeLens[data[inPos]]);
+ hashTable.insert(curr, inPos);
+ inPos++;
+ }
+ }
+
+ return (outStream.pos / 8) + ((outStream.pos % 8 == 0)?0:1);
+ }
+
+ // Return length of compressed data in bytes
+ return (outStream.pos / 8) + ((outStream.pos % 8 == 0)?0:1);
+ }
+
}
diff --git a/src/zlib.h b/src/zlib.h
index fc6a1c3..efebedb 100644
--- a/src/zlib.h
+++ b/src/zlib.h
@@ -1,7 +1,9 @@
#pragma once
#include <cstdint>
+#include <list>
#include <string>
+#include <vector>
namespace TehImage
{
@@ -23,7 +25,16 @@ namespace TehImage
unsigned long pos;
};
- class ZLibInflator
+ enum CompressionType : uint16_t
+ {
+ NONE = 0b00,
+ STATIC_CODES = 0b01,
+ DYNAMIC_CODES = 0b10,
+ RESERVED = 0b11
+ };
+
+ // RFC 1950/1951
+ class ZLib
{
private:
HuffmanTree tree;
@@ -31,17 +42,24 @@ namespace TehImage
bool staticTree = false;
bool haveTree = false;
public:
- ZLibInflator() = default;
- ~ZLibInflator() = default;
+ ZLib() = default;
+ ~ZLib() = default;
static bool nextBit(StreamData* stream);
- static uint16_t nextBits(StreamData* stream, int bits); // Max 16 bits
+ static uint16_t nextBits(StreamData* stream, int count); // Max 16 bits
+
+ static void writeBit(StreamData* stream, bool bit);
+ static void writeBits(StreamData* stream, int count, uint16_t bits);
+
+ void writeCode(StreamData* stream, uint16_t code, uint8_t codeLen);
void calculateCodes(uint8_t* lengths, uint16_t* codesOut, int codeCount);
void buildHuffmanTree(uint8_t* lengths, uint16_t* codes, int codeCount);
void buildHuffmanTree(uint8_t* lengths, uint16_t* codes, int codeCount, HuffmanTree* treeOut);
-
+
+ void getStaticHuffmanCodes(uint16_t* codesOut, uint8_t* codeLensOut, uint16_t* distCodesOut, uint8_t* distCodeLensOut);
+
void buildStaticHuffmanTree();
void buildStaticHuffmanTree(HuffmanTree* treeOut, HuffmanTree* distTreeOut);
@@ -52,6 +70,7 @@ namespace TehImage
uint16_t getNextCode(StreamData* stream, HuffmanTree* tree);
int decodeData(uint8_t* data, unsigned long length, uint8_t* out, unsigned long outLength);
+ int encodeData(uint8_t* data, unsigned long length, uint8_t* out, unsigned long outLength);
};
}