// sZipDemo.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include "HuffmanTree.cpp" #include "sZip.h" #include <fstream> #include <iostream> using namespace std; int _tmain(int argc, _TCHAR* argv[]) { char str1[10000]; ifstream in; in.open("text.txt",ios::in|ios::binary); in>>str1; cout<<"成功读取text.doc!"<<endl; int num = in.gcount(); in.close(); char tempstr[100000]; for(int i = 0;i <= num;i++) tempstr[i] = str1[i]; unsigned int count[128]; char data[128]; HuffmanTree<char> Tree; Tree.creat(tempstr,data,count); char charCount[256]; for(int i = 0;count[i] != 0;i++){ static int n = -1; if(count[i] < 10){ //个位 charCount[++n] = count[i]+48; charCount[++n] = '#'; } else if(count[i] >= 10 && count[i] < 100){ //十位 charCount[++n] = count[i]/10+48; charCount[++n] = count[i]%10+48; charCount[++n] = '#'; } else if(count[i] >= 100 && count[i] < 1000){ //百位 charCount[++n] = count[i]/100+48; charCount[++n] = count[i]%100/10+48; charCount[++n] = count[i]%10+48; charCount[++n] = '#'; } else{ //千位 charCount[++n] = count[i]/1000+48; charCount[++n] = count[i]%1000/100+48; charCount[++n] = count[i]%100/10+48; charCount[++n] = count[i]%10+48; charCount[++n] = '#'; } charCount[n+1] = 0; } ofstream out; out.open("text11.txt",ios::out|ios::binary); out<<data<<'#'<<'#'<<charCount; out<<'#'<<'#'; sZip zip; char str2[100000]; int size = 0; zip.zip(str1,str2,Tree.root,size,data); out<<str2; out.close(); cout<<"成功压缩text.doc文件,压缩文件为text11.doc"<<endl; cout<<"------------------------------------------------------------------------"<<endl; char str3[100000]; char str4[10000]; in.open("text11.txt",ios::in|ios::binary); in.read(str3,80000); in.close(); str3[in.gcount()] = 0; zip.unzip(str3,str4); out.open("text22.txt",ios::out|ios::binary); out<<str4; out.close(); return 0; }
#pragma once template <class T> struct HuffmanTreeNode{ T data; //数据 unsigned int count; //权值 char code[128]; //结点的哈弗曼编码 HuffmanTreeNode<T> *leftChild; //左子女 HuffmanTreeNode<T> *rightChild; //右子女 HuffmanTreeNode<T> *parent; //父节点 HuffmanTreeNode():leftChild(NULL),rightChild(NULL),parent(NULL){} //构造函数 bool operator <=(HuffmanTreeNode &R){return count <= R.count;} //重载<=和>运算符 bool operator >(HuffmanTreeNode &R){return count > R.count;} }; template <class T> class HuffmanTree { public: HuffmanTree(); HuffmanTree(HuffmanTree<T> &t); ~HuffmanTree(void); void printData(); int getWPL(); //获取WPL //打开文件 void creat(char str[],char data[],unsigned int count[]); //哈弗曼的建立 void creat(char data[],unsigned int count[]); //重载 template <class T> friend void findKey(HuffmanTreeNode<T> *subTree,T key,char code[]){ //寻找结点的code static T firstNodeData = subTree->data; if(subTree != NULL){ if(subTree->data != key){ findKey(subTree->leftChild,key,code); findKey(subTree->rightChild,key,code); } else{ int i = 0; for(;subTree->code[i] != 0;i++) code[i] = subTree->code[i]; code[i] = 0; } } } HuffmanTreeNode<T> *root; protected: private: int WPL; void census(unsigned int count[],char data[],char str[],unsigned int &size); //建立哈弗曼树时统计各字符出现的次数 void deleteTree(HuffmanTreeNode<T> *subTree); //删除subTree结点的所有子结点 void encode(HuffmanTreeNode<T> *subTree,char code[] ,char m = 0); //编码 void calculateWPL(HuffmanTreeNode<T> *subTree,int i = 0); //计算WPL void printCode(HuffmanTreeNode<T> *subTree); //输出哈弗曼编码的子函数 void printData(HuffmanTreeNode<T> *subTree); //输出所有结点data和权值的子函数 };
#pragma once #include "StdAfx.h" #include "HuffmanTree.h" #include "MinHeap.cpp" #include <fstream> template <class T> HuffmanTree<T>::HuffmanTree(){ WPL = 0; root = new HuffmanTreeNode<T>; }; template <class T> HuffmanTree<T>::~HuffmanTree(){ deleteTree(root); //删除哈弗曼树 } template <class T> void HuffmanTree<T>::creat(char str[],char data[],unsigned int count[]){ unsigned int size; //字符的个数 for(unsigned int i = 0; i < 128; i++) //count的初始化 count[i] = 0; census(count,data,str,size); minHeap<HuffmanTreeNode<T>> hp; HuffmanTreeNode<T> *parent, *first, *second; HuffmanTreeNode<T> *work; for(unsigned int i = 0; i < size; i++){ //把每个元素都插入最小堆中 work = new HuffmanTreeNode<T>; work->data = data[i]; work->count = count[i]; work->leftChild = NULL; work->rightChild = NULL; work->parent = NULL; hp.insert(*work); } for(unsigned int i = 0; i < size-1; i++){ parent = new HuffmanTreeNode<T>; first = new HuffmanTreeNode<T>; second = new HuffmanTreeNode<T>; hp.removeMin(*first); //每次从最小堆中取出两个最小的数 hp.removeMin(*second); parent->leftChild = first; //parent作为他们的父节点 parent->rightChild = second; parent->count = first->count + second->count; parent->data = '#'; //parent不是根结点所以把它的data设为'#' first->parent = parent; second->parent = parent; hp.insert(*parent); //再把parent插入最小堆中,进行循环判断 } root = parent; //最后一个parent就是根结点 char code[128]; for(unsigned int i = 0;i < 128; i++) code[i] = 0; encode(root,code); //对结点进行哈弗曼编码(空的地方取0) }; template <class T> void HuffmanTree<T>::creat(char data[],unsigned int count[]){ unsigned int size = 0; for(unsigned int i = 0; count[i] != 0; i++) size++; minHeap<HuffmanTreeNode<T>> hp; HuffmanTreeNode<T> *parent, *first, *second; HuffmanTreeNode<T> *work; for(unsigned int i = 0; i < size; i++){ //把每个元素都插入最小堆中 work = new HuffmanTreeNode<T>; work->data = data[i]; work->count = count[i]; work->leftChild = NULL; work->rightChild = NULL; work->parent = NULL; hp.insert(*work); } for(unsigned int i = 0; i < size-1; i++){ parent = new HuffmanTreeNode<T>; first = new HuffmanTreeNode<T>; second = new HuffmanTreeNode<T>; hp.removeMin(*first); //每次从最小堆中取出两个最小的数 hp.removeMin(*second); parent->leftChild = first; //parent作为他们的父节点 parent->rightChild = second; parent->count = first->count + second->count; parent->data = '#'; //parent不是根结点所以把它的data设为'#' first->parent = parent; second->parent = parent; hp.insert(*parent); //再把parent插入最小堆中,进行循环判断 } root = parent; //最后一个parent就是根结点 char code[128]; for(unsigned int i = 0;i < 128; i++) code[i] = 0; encode(root,code); //对结点进行哈弗曼编码(空的地方取0) } template <class T> void HuffmanTree<T>::deleteTree(HuffmanTreeNode<T> *subTree){ if(subTree != NULL){ deleteTree(subTree->leftChild); //后序遍历来删除结点 deleteTree(subTree->rightChild); delete subTree; } } template <class T> void HuffmanTree<T>::census(unsigned int count[],char data[],char str[],unsigned int &size){ //用于统计字符出现的次数 for(int i = 0; str[i] != 0;i++){ if(str[i] == '#') //当出现的是已出现过的字符时就执行下次循环 continue; static int n = 0; count[n]++; //第一次出现时加一 data[n] = str[i]; for(int j = 0; str[j] != 0;j++){ if(str[i] == str[j] && i != j){ count[n]++; //每次出现重复的时候加一并且 str[j] = '#'; //表明已出现过 } } data[++n] = 0; size = n; } } template <class T> void HuffmanTree<T>::encode(HuffmanTreeNode<T> *subTree,char code[] ,char m = 0){ if(subTree != NULL){ int j; for(j = 0;code[j] != 0;j++){ subTree->code[j] = code[j]; } for(j = 0;code[j] != 0;j++){ } subTree->code[j] = m; subTree->code[j+1] = 0; encode(subTree->leftChild,subTree->code,'0'); encode(subTree->rightChild,subTree->code,'1'); } } template <class T> void HuffmanTree<T>::calculateWPL(HuffmanTreeNode<T> *subTree,int i = 0){ if(subTree != NULL){ if(subTree->data != '#') WPL += i*subTree->count; //i为当前层数,count为结点权值 calculateWPL(subTree->leftChild,++i); //前序遍历 i--; calculateWPL(subTree->rightChild,++i); i--; } } template <class T> int HuffmanTree<T>::getWPL(){ return WPL; }
#pragma once #define DafaultSize 50 template<class E> class minHeap{ public: minHeap(int size = DafaultSize); ~minHeap(); bool insert(const E& x); bool removeMin(E& x); private: E *heap; int currentSize; int maxHeapSize; void siftDown(int start, int m); void siftUp(int start); };
#pragma once #include "StdAfx.h" #include "MinHeap.h" template<class E> minHeap<E>::minHeap(int size){ maxHeapSize = (DafaultSize<size)? size:DafaultSize; heap = new E[maxHeapSize]; if(heap == NULL){ //cout<<"堆存储分配失败"<<endl; } currentSize = 0; }; template<class E> minHeap<E>::~minHeap(){ delete heap; } template<class E> void minHeap<E>::siftDown(int start, int m){ int i = start; //初始位置 int j = 2*i+1; //j是i的左子女位置 E temp = heap[i]; while(j <= m){ if(j<m && heap[j] > heap[j+1]) //左子女大于右子女时,j指向右子女 j++; if(temp <= heap[j]) break; else{ //大则向上移 heap[i] = heap[j]; i = j; j = 2*j+1; } } heap[i] = temp; }; template<class E> void minHeap<E>::siftUp(int start){ int j = start; int i = (j-1)/2; //i的左子女是j E temp = heap[j]; while(j>0){ if(heap[i] <= temp) //如果父节点大 break; else{ //如果父节点小,上滑 heap[j] = heap[i]; j = i; i = (i-1)/2; } } heap[j] = temp; }; template<class E> bool minHeap<E>::insert(const E& x){ if(currentSize == maxHeapSize){ //堆满时 // cout<<"堆已满"<<endl; return false; } heap[currentSize] = x; //在heap尾插入x siftUp(currentSize); //对x进行上滑判断 currentSize++; return true; }; template<class E> bool minHeap<E>::removeMin(E& x){ if(currentSize == 0){ //堆空时 // cout<<"堆为空!!"<<endl; return false; } x = heap[0]; //从heap头获取remove的数据 heap[0] = heap[currentSize-1]; //从heap尾获取元素补到heap头的位置 currentSize--; //元素个数减一 siftDown(0,currentSize-1); //再对heap从头到尾进行下滑判断 return true; };
#pragma once #include "HuffmanTree.cpp" class sZip { public: sZip(void); ~sZip(void); void zip(char str1[],char str2[],HuffmanTreeNode<char>* subTree,int &size,char data[]); void unzip(char str1[],char str2[]); private: bool codeEqual(char code1[],char code2[][128],int &num); void getHuffCode(HuffmanTreeNode<char>* subTree,char code[][128],char data[]); void strBinary(char str[],int &size); void binaryStr(char str1[]); };
#include "StdAfx.h" #include "sZip.h" #include <iostream> using namespace std; sZip::sZip(void){ } sZip::~sZip(void) { } void sZip::zip(char str1[],char str2[],HuffmanTreeNode<char>* subTree,int &size,char data[]){ char code[128][128]; getHuffCode(subTree,code,data); //获取subTree的哈弗曼编码 unsigned int i = 0; unsigned int n = -1; for(; str1[i] != 0 ;i++){ //遍历str1,把里面的字符转化为哈弗曼编码存在str2中 int j = 0; while(data[j] != str1[i]){ j++; if(data[j] == 0) break; } if(data[j] == str1[i]){ for(int count = 0;code[j][count] != 0;count++) str2[++n] = code[j][count]; str2[n+1]=0; } } strBinary(str2,size); //把str2的每一个字符变成每一个bit,8个bit合成一个字符 } void sZip::unzip(char str1[],char str2[]){ unsigned int count[128]; for(int i = 0;i < 127;i++) count[i] = 0; char code[128][128]; char data[128]; for(int i = 0;i < 128;i++) code[i][0] = 0; int i = 0; for(;str1[i] != '#';i++) data[i] = str1[i]; data[i] = 0; int j = i+2; i = -1; while(1){ if(str1[j] != '#' && str1[j+1] == '#') //个位 count[++i] = str1[j]-48; else if(str1[j+1] != '#' && str1[j+2] == '#'){ //十位 count[++i] = int(str1[j]-48)*10+int(str1[j+1]-48); j++; } else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] == '#'){ //百位 count[++i] = int(str1[j]-48)*100+int(str1[j+1]-48)*10+int(str1[j+2]-48); j = j+2; } else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] != '#'&& str1[j+4] =='#'){ //千位 count[++i] = int(str1[j]-48)*1000+int(str1[j+1]-48)*100+int(str1[j+2]-48)*10+int(str1[j+3]-48); } else if(str1[j] == '#' && str1[j+1] == '#') //读完 break; else cout<<"读取错误"<<endl; j = j+2; } HuffmanTree<char> tree; tree.creat(data,count); getHuffCode(tree.root,code,data); j = j+2; i = -1; char tempchar[100000]; for(;str1[j] != 0;j++) tempchar[++i] = str1[j]; tempchar[i+1] = 0; binaryStr(tempchar); //把压缩文件转化为二进制编码 char tempcode[128]; j = -1; int num; int n = -1; i = 0; for(;tempchar[i] != 0;i++){ //遍历tempchar,把它从哈弗曼编码转化为对应的字符 tempcode[++n] = tempchar[i]; tempcode[n+1] = 0; if(codeEqual(tempcode,code,num)){ str2[++j] = data[num]; for(n = 0;tempcode[n] != 0;n++) //重置tempcode tempcode[n] = 0; n = -1; } else continue; } str2[++j] = 0; } void sZip::getHuffCode(HuffmanTreeNode<char>* subTree,char code[][128],char data[]){ for(int i = 0;data[i] != 0;i++) findKey(subTree,data[i],code[i]); } void sZip::strBinary(char str[],int &size){ char str2[100000]; char bit[8]; int n = 0; int count = 0; while(str[n] == '1' || str[n] == '0'){ for(int i = 0;i < 8 ;i++){ if(str[n] == 0){ str[n] = '0'; str[n+1] = 0; } bit[i] = str[n]; n++; } str2[count] = 0; for(int i = 0;i < 8 ;i++){ if(bit[i] == '1'){ char temp = 1; str2[count] = (str2[count]<<1)|temp; //给它最后一位加上一即:左移一位,然后和00000001求或 } else str2[count] = (str2[count]<<1); } count++; } for(int i = 0;i <= count;i++) str[i] = str2[i]; for(int i = count;str[i] != 0;i++) str[i] = 0; size = count; } void sZip::binaryStr(char str1[]){ char str2[100000]; int i = -1; int n = 0; while(str1[n] != 0){ int temp[8]; int tempchar = abs(str1[n]); for(int count = 0;count < 8;count++){ temp[count] = tempchar%2; tempchar /= 2; } if(str1[n]<0 && str1[n] > -128){ //当为负数时,二进制为正数第一位为1(符号位)的反码最后一位加一(补码) temp[7] = 1; for(int count = 0;count < 7;count++){ //求反码 if(temp[count] == 0) temp[count] = 1; else temp[count] = 0; } int x = 1; //进位数 for(int count = 0;count < 8;count++){ //末位加一 if(temp[count] == 0){ if(x == 1){ temp[count] = 1; x = 0; } } else if(x == 1) temp[count] = 0; } } for(int count = 7;count != -1;count--) str2[++i] = char(temp[count]+48); n++; } str2[++i] = 0; for(int count = 0;count <= i;count++) str1[count] = str2[count]; } bool sZip::codeEqual(char code1[],char code2 [][128],int &num){ unsigned int size1 = 0; unsigned int size2 = 0; for(int i = 0;code1[i] != 0;i++) size1++; for(int i = 0;code2[i][0] != 0;i++) size2++; for(int i = 0;i < size2;i++){ int j = 0; int size3 = 0; for(int n = 0;code2[i][n] != 0;n++) size3++; for(;j < size1;j++){ if(code1[j] != code2[i][j]) break; //有一位不等于就直接跳出 } if(j == size1 && j == size3){ num = i; return true; } } return false; }
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。