原理:Huffman树的应用:Huffman编码,为出现频率较高的字符指定较短的码字,而为出现频率较低的字符指定较短的码字,可以实现二进制文件的压缩。
Heap.h
#pragma once
#include <vector>
//仿函数
template<class T>
struct Lesser
{
bool operator()(const T& l, const T& r)
{
return l < r;
}
};
template<class T>
struct Greater
{
bool operator()(const T& l, const T& r)
{
return l > r;
}
};
template<class T, class Compare = Lesser<T>>
class Heap
{
public:
Heap()
{}
Heap(const T* a, size_t size)
{
for (size_t i = 0; i < size; ++i)
{
_a.push_back(a[i]);
}
for (int i = (_a.size()-2)/2; i >= 0; --i)
{
_AdjustDown(i);
}
}
void Push(const T& x)
{
_a.push_back(x);
_AdjustUp(_a.size()-1);
}
void Pop()
{
assert(!_a.empty());
swap(_a[0], _a[_a.size()-1]);
_a.pop_back();
_AdjustDown(0);
}
T& Top()
{
assert(!_a.empty());
return _a[0];
}
bool Empty()
{
return _a.empty();
}
size_t Size()
{
return _a.size();
}
protected:
void _AdjustUp(int child)
{
Compare cmp;
int parent = (child-1)/2;
while (child > 0)//parent>=0 ?
{
if (cmp(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child-1) / 2;
}
else
{
break;
}
}
}
void _AdjustDown(int parent)
{
Compare cmp;
int child = parent*2 + 1;
while (child < _a.size())
{
if (child+1 < _a.size() && cmp(_a[child+1], _a[child]))
{
++child;
}
if (cmp(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
parent = child;
child = parent*2 + 1;
}
else
{
break;
}
}
}
protected:
vector<T> _a;
};
#pragma once
#include "Heap.h"
template<class T>
struct HuffmanTreeNode
{
HuffmanTreeNode<T>* _left;
HuffmanTreeNode<T>* _right;
HuffmanTreeNode<T>* _parent;
T _weight;
HuffmanTreeNode(const T& weight)
:_left(NULL)
,_right(NULL)
,_parent(NULL)
,_weight(weight)
{}
};
template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;
public:
HuffmanTree()
:_root(NULL)
{}
~HuffmanTree()
{
_Destory(_root);
}
HuffmanTree(const T* a, size_t size, const T& invalid)
{
_root = _CreateTree(a, size, invalid);
}
Node* GetRoot()
{
return _root;
}
protected:
Node* _CreateTree(const T* a,size_t size, const T& invalid)
{
assert(a);
struct Compare
{
bool operator()(const Node* l, const Node* r)
{
return l->_weight < r->_weight;
}
};
Heap<Node*, Compare> minHeap;
for (size_t i = 0; i < size; ++i)
{
if (a[i] != invalid)
{
minHeap.Push(new Node(a[i]));
}
}
while (minHeap.Size() > 1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent = parent;
minHeap.Push(parent);
}
return minHeap.Top();
}
void _Destory(Node* root)
{
if (root == NULL)
return;
_Destory(root->_left);
_Destory(root->_right);
}
protected:
Node* _root;
};
void HuffmanTreeTest()
{
int a[] = {0,1,2,3,4,5,6,7,8,9};
HuffmanTree<int> ht(a, 10, -1);
}
FileCompress.h
#pragma once
#include "HuffmanTree.h"
#include <Windows.h>
struct CharInfo
{
char _ch;
int _count;
string _code;
CharInfo(unsigned char ch = 0)
:_ch(ch)
,_count(0)
{}
CharInfo operator+(const CharInfo& x)
{
CharInfo tmp;
tmp._count = _count + x._count;
return tmp;
}
bool operator!=(const CharInfo& x) const
{
return _count != x._count;
}
bool operator<(const CharInfo& x) const
{
return _count < x._count;
}
};
template<class T>
class FileCompress
{
public:
FileCompress()
{
for (size_t i = 0; i < 256; ++i)
{
_infos[i] = i;
}
}
void Compress(const char* filename)
{
assert(filename);
FILE* fOutFile = fopen(filename, "rb");
assert(fOutFile);
char ch = fgetc(fOutFile);
int charCount = 0;//统计字符总数
while (!feof(fOutFile))
{
++charCount;
_infos[(unsigned char)ch]._count++;
ch = fgetc(fOutFile);
}
//创建Huffman树
CharInfo invalid(0);
HuffmanTree<CharInfo> t(_infos, 256, invalid);
//由Huffman树生成Huffman编码
_GenerateHuffmanCode(t.GetRoot());
//压缩
string compressFilename = filename;
compressFilename += ".compress";
FILE* fInCompress = fopen(compressFilename.c_str(), "wb");
assert(fInCompress);
fseek(fOutFile, 0, SEEK_SET);
ch = fgetc(fOutFile);
char value = 0;
int size = 0;
while (!feof(fOutFile))
{
string& code = _infos[(unsigned char)ch]._code;
for (size_t i = 0; i < code.size(); ++i)
{
value <<= 1;
if (code[i] == '1')
{
value |= 1;
}
++size;
if (size == 8)
{
fputc(value, fInCompress);
size = 0;
value = 0;
}
}
ch = fgetc(fOutFile);
}
if (size > 0)//补位
{
value <<= (8-size);
fputc(value, fInCompress);
}
//写配置文件,方便解压缩时读取
string configFilename = filename;
configFilename += ".config";
FILE* fInConfig = fopen(configFilename.c_str(), "wb");
assert(fInConfig);
string line;
char buffer[128];
//将字符总数写入配置文件第一行
line += itoa(charCount, buffer, 10);
line += "\n";
fputs(line.c_str(), fInConfig);
line.clear();
for (size_t i = 0; i < 256; ++i)
{
if (_infos[i]._count)
{
line += _infos[i]._ch;
line += ',';
line += itoa(_infos[i]._count, buffer, 10);
line += '\n';
fputs(line.c_str(), fInConfig);
}
line.clear();
}
fclose(fOutFile);
fclose(fInCompress);
fclose(fInConfig);
}
void UnCompress(const char* filename)
{
//读取配置文件
string configFilename = filename;
configFilename += ".config";
FILE* fOutConfig = fopen(configFilename.c_str(), "rb");
assert(fOutConfig);
string line;
//读取字符总数
_ReadLine(fOutConfig, line);
int charCount = atoi(line.c_str());
line.clear();
while (_ReadLine(fOutConfig, line))
{
if (!line.empty())
{
unsigned char ch = line[0];
_infos[ch]._count = atoi(line.substr(2).c_str());
line.clear();
}
else
{
line.clear();
_ReadLine(fOutConfig, line);
unsigned char ch = '\n';
_infos[ch]._count = atoi(line.substr(1).c_str());
line.clear();
}
}
//重建Huffman树
CharInfo invalid(0);
HuffmanTree<CharInfo> t(_infos, 256, invalid);
//读.compress文件,写.uncompress文件
string compressFilename = filename;
compressFilename += ".compress";
FILE* fOutCompress = fopen(compressFilename.c_str(), "rb");
assert(fOutCompress);
string uncompressFilename = filename;
uncompressFilename += ".uncompress";
FILE* fInUncompress = fopen(uncompressFilename.c_str(), "wb");
assert(fInUncompress);
HuffmanTreeNode<CharInfo>* root = t.GetRoot();
HuffmanTreeNode<CharInfo>* cur = root;
int pos = 7;
char ch = fgetc(fOutCompress);
while (1)
{
if (ch & (1<<pos))
cur = cur->_right;
else
cur = cur->_left;
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_weight._ch, fInUncompress);
cur = root;
if (--charCount == 0)//字符已读取完,遇到补位的0不再读取
{
break;
}
}
--pos;
if (pos == -1)
{
ch = fgetc(fOutCompress);
pos = 7;
}
}
fclose(fOutCompress);
fclose(fInUncompress);
}
protected:
void _GenerateHuffmanCode(HuffmanTreeNode<CharInfo>* root)
{
if (root == NULL)
return;
_GenerateHuffmanCode(root->_left);
_GenerateHuffmanCode(root->_right);
if (root->_left == NULL && root->_right == NULL)
{
HuffmanTreeNode<CharInfo>* cur = root;
HuffmanTreeNode<CharInfo>* parent = root->_parent;
string& code = _infos[(unsigned char)cur->_weight._ch]._code;
while (parent)
{
if (parent->_left == cur)
code += '0';
if (parent->_right == cur)
code += '1';
cur = parent;
parent = cur->_parent;
}
reverse(code.begin(), code.end());
}
}
bool _ReadLine(FILE* filename, string& line)
{
char ch = fgetc(filename);
if (ch == EOF)
return false;
while (ch != EOF && ch != '\n')
{
line += ch;
ch = fgetc(filename);
}
return true;
}
protected:
CharInfo _infos[256];
};
void CompressTest()
{
//压缩
FileCompress<CharInfo> compress;
int CompressBegin = GetTickCount();
compress.Compress("Input.BIG");
int CompressEnd = GetTickCount();
cout<<"压缩用时:"<<CompressEnd-CompressBegin<<" ms"<<endl;
}
void UncompressTest()
{
//解压缩
FileCompress<CharInfo> uncompress;
int UncompressBegin = GetTickCount();
uncompress.UnCompress("Input.BIG");
int UncompressEnd = GetTickCount();
cout<<"解压缩用时:"<<UncompressEnd-UncompressBegin<<" ms"<<endl;
}
Test.cpp
#include <iostream>
using namespace std;
#include <assert.h>
#include "FileCompress.h"
int main()
{
CompressTest();
UncompressTest();
return 0;
}
下面是对一个大小为8.04MB文件的测试:
结果成功压缩、解压缩:
压缩后的文件大小:
压缩后的文件:
配置文件:
用Beyond Compare文本比较工具检查原文件与解压后的文件:
无差异
亿速云「云服务器」,即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘,价格低至29元/月。点击查看>>
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。