C++数据结构之文件压缩(哈夫曼树)实例详解

时间:2021-05-20

C++数据结构之文件压缩(哈夫曼树)实例详解

概要:

项目简介:利用哈夫曼编码的方式对文件进行压缩,并且对压缩文件可以解压

开发环境:windows vs2013

项目概述:

1.压缩

a.读取文件,将每个字符,该字符出现的次数和权值构成哈夫曼树

b.哈夫曼树是利用小堆构成,字符出现次数少的节点指针存在堆顶,出现次数多的在堆底

c.每次取堆顶的两个数,再将两个数相加进堆,直到堆被取完,这时哈夫曼树也建成

d.从哈夫曼树中获取哈夫曼编码,然后再根据整个字符数组来获取出现了得字符的编码

e.获取编码后每次凑满8位就将编码串写入到压缩文件(value处理编码1与它即可,0只移动位)

f.写好配置文件,统计每个字符及其出现次数,并以“字符+','+次数”的形式保存到配置文件中

2.解压

a.读取配置文件,统计所有字符的个数

b.构建哈夫曼树,读解压缩文件,将所读到的编码字符的这个节点所所含的字符写入到解压缩文件中,知道将压缩文件读完

c.压缩解压缩完全完成,进行小文件大文件的测试

实例代码:

#pragma once #include<vector> template<class T> struct Less { bool operator()(const T& l, const T& r) const { return l < r; } }; template<class T> struct Greater { bool operator()(const T& l, const T& r) const { return l > r; } }; template<class T, class Compare> class Heap { public: Heap() {} Heap(T* array, size_t n) //建堆 { _array.reserve(n); for (size_t i = 0; i < n; i++) { _array.push_back(array[i]); } for (int i = (_array.size() - 2) >> 1; i >= 0; --i) { _AdjustDown(i); } } const T& Top()const { return _array[0]; } void Push(const T& x) { _array.push_back(x); _AdjustUp(_array.size() - 1); } size_t Size() { return _array.size(); } void Pop() { assert(_array.size() > 0); swap(_array[0], _array[_array.size() - 1]); _array.pop_back(); _AdjustDown(0); } bool Empty() { return _array.size() == 0; } void Print() { for (size_t i = 0; i < _array.size(); ++i) { cout << _array[i] << " "; } cout << endl; } protected: void _AdjustUp(int child) //上调 { Compare ComFunc; int parent = (child - 1) >> 1; while (child) { if (ComFunc(_array[child], _array[parent])) { swap(_array[child], _array[parent]); child = parent; parent = (child - 1) >> 1; } else { break; } } } void _AdjustDown(int root) //下调 { Compare ComFunc; int parent = root; int child = root * 2 + 1; while (child < _array.size()) { if (child + 1 < _array.size() && ComFunc(_array[child + 1], _array[child])) { ++child; } if (ComFunc(_array[child], _array[parent])) { swap(_array[child], _array[parent]); parent = child; child = parent * 2 + 1; } else { break; } } } protected: vector<T> _array; }; void TestHeap() { int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 }; //Heap<int> heap(a, sizeof(a) / sizeof(a[0])); //Heap<int, Less<int>> heap(a, sizeof(a) / sizeof(a[0])); Heap<int, Greater<int>> heap(a, sizeof(a) / sizeof(a[0])); heap.Print(); heap.Push(25); heap.Print(); heap.Pop(); heap.Print(); } #pragma once #include"Heap.h" template<class T> struct HuffmanTreeNode { HuffmanTreeNode<T>* _left; HuffmanTreeNode<T>* _right; HuffmanTreeNode<T>* _parent; T _w; //权值 HuffmanTreeNode(const T& x) :_w(x) , _left(NULL) , _right(NULL) , _parent(NULL) {} }; template<class T> class HuffmanTree { typedef HuffmanTreeNode<T> Node; public: HuffmanTree() :_root(NULL) {} HuffmanTree(T* a, size_t n, const T& invalid = T()) //构建哈夫曼树 { struct Compare { bool operator()(Node* l, Node* r) const { return l->_w < r->_w; } }; Heap<Node*, Compare> minHeap; for (size_t i = 0; i < n; ++i) { if (a[i] != invalid) { minHeap.Push(new Node(a[i])); } } while (minHeap.Size() > 1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_w + right->_w); parent->_left = left; parent->_right = right; left->_parent = parent; right->_parent = parent; minHeap.Push(parent); } _root = minHeap.Top(); } Node* GetRoot() { return _root; } ~HuffmanTree() { _Destory(_root); } protected: void _Destory(Node* root) { if (root == NULL) { return; } _Destory(root->_left); _Destory(root->_right); delete root; } HuffmanTree(const HuffmanTree<T>& tree); HuffmanTree& operator=(const HuffmanTree<T>& tree); protected: Node* _root; }; void TestHuffmanTree() {<pre code_snippet_id="2340790" snippet_file_name="blog_20170418_2_3778260" name="code" class="cpp">#pragma once #include<iostream> using namespace std; #include<assert.h> #include"HuffmanTree.h" typedef long long LongType; struct CharInfo { unsigned char _ch; //字符 LongType _count; //字符出现的次数 string _code; //huffman编码 CharInfo(LongType count = 0) :_count(count) , _ch(0) , _code("") {} bool operator<(const CharInfo& info) const { return _count < info._count; } CharInfo operator+(const CharInfo& info) { return CharInfo(_count + info._count); } bool operator!=(const CharInfo& info)const { return _count != info._count; } }; struct CountInfo { unsigned char _ch; //字符 LongType _count; //字符出现的次数 }; class FileCompress { public: FileCompress() { for (size_t i = 0; i < 256; ++i) { _info[i]._ch = i; } } void Compress(const char* filename) { assert(filename); //统计字符出现的次数,均已二进制方式读写 FILE* fout = fopen(filename, "rb"); assert(fout); int ch = fgetc(fout); string CompressFile = filename; CompressFile += ".huffman"; FILE* fin = fopen(CompressFile.c_str(), "wb"); assert(fin); CountInfo info; while (ch != EOF) { _info[(unsigned char)ch]._count++; ch = fgetc(fout); } //构建huffman tree CharInfo invalid; invalid._count = 0; HuffmanTree<CharInfo> tree(_info, 256, invalid); //生成huffman code GetHuffmanCode(tree.GetRoot()); //压缩 //写配置文件 for (size_t i = 0; i < 256; ++i) { if (_info[i]._count) { info._ch = _info[i]._ch; info._count = _info[i]._count; fwrite(&info, sizeof(info), 1, fin); } } info._count = -1; fwrite(&info, sizeof(info), 1, fin); fseek(fout, 0, SEEK_SET); //返回到文件开始 ch = fgetc(fout); char value = 0; //二进制 int pos = 0; //左移位数 while (ch != EOF) { string& code = _info[(unsigned char)ch]._code; size_t i = 0; for (i = 0; i < code.size(); ++i) { value <<= 1; ++pos; if (code[i] == '1') { value |= 1; } if (pos == 8) //满8位写进文件中 { fputc(value, fin); value = 0; pos = 0; } } ch = fgetc(fout); } if (pos) { value <<= (8 - pos); fputc(value, fin); } fclose(fin); fclose(fout); } void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root) //huffman编码 { if (root == NULL) { return; } if (root->_left == NULL && root->_right == NULL) { HuffmanTreeNode<CharInfo> *parent, *cur; cur = root; parent = cur->_parent; string& code = _info[(unsigned char)root->_w._ch]._code; while (parent) { if (cur == parent->_left) { code += '0'; } else { code += '1'; } cur = parent; parent = cur->_parent; } reverse(code.begin(), code.end()); } GetHuffmanCode(root->_left); GetHuffmanCode(root->_right); } //解压 void UnCompress(const char* filename) { assert(filename); string UnCompressFile = filename; size_t index = UnCompressFile.rfind('.'); assert(index != string::npos); UnCompressFile = UnCompressFile.substr(0, index); UnCompressFile += ".unhuffman"; FILE* fout = fopen(filename, "rb"); assert(fout); FILE* fin = fopen(UnCompressFile.c_str(), "wb"); assert(fin); CountInfo info; fread(&info, sizeof(CountInfo), 1, fout); //读配置信息 while (1) { fread(&info, sizeof(CountInfo), 1, fout); if (info._count == -1) { break; } _info[(unsigned char)info._ch]._ch = info._ch; _info[(unsigned char)info._ch]._count = info._count; } //重建huffman树 CharInfo invalid; invalid._count = 0; HuffmanTree<CharInfo> tree(_info, 256, invalid); HuffmanTreeNode<CharInfo>* root = tree.GetRoot(); HuffmanTreeNode<CharInfo>* cur = root; LongType count = root->_w._count; int value = fgetc(fout); if (value == EOF) //只有一种字符 { if (info._ch != 0) { while (cur->_w._count--) { fputc(cur->_w._ch, fin); } } } else { while (value != EOF) { int pos = 7; char test = 1; while (pos >= 0) { if (value & (test << pos)) { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left == NULL && cur->_right == NULL) { fputc(cur->_w._ch, fin); cur = root; if (--count == 0) { break; } } --pos; } value = fgetc(fout); } } fclose(fout); fclose(fin); } protected: CharInfo _info[256]; //所有字符信息 }; void TestFileCompress() { FileCompress fc; //fc.Compress("实验.doc"); //fc.UnCompress("实验.doc.huffman"); //fc.Compress("qq.JPG"); //fc.UnCompress("qq.JPG.huffman"); //fc.Compress("press(); system("pause"); return 0; }</pre><br> <br> <p></p> <p><br> </p> <p><br> </p> <link rel="stylesheet" href="http://static.blog.csdn.net/public/res-min/markdown_views.css?v=1.0" rel="external nofollow" >

以上就是哈夫曼树的实例详解,如有疑问请留言或者到本站社区交流,感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!

声明:本页内容来源网络,仅供用户参考;我单位不保证亦不表示资料全面及准确无误,也不保证亦不表示这些资料为最新信息,如因任何原因,本网内容或者用户因倚赖本网内容造成任何损失或损害,我单位将不会负任何法律责任。如涉及版权问题,请提交至online#300.cn邮箱联系删除。

相关文章