文件压缩-基于哈弗曼树

简介:基于哈夫曼编码的方式对文件进行压缩,并且对压缩文件可以解压

开发环境: vs2013

项目概述:

        1.压缩

            a.读取文件,将每个字符,该字符出现的次数和权值构成哈夫曼树

            b.哈夫曼树是利用小堆构成,字符出现次数少的节点指针存在堆顶,出现次数多的在堆底

            c.每次取堆顶的两个数,再将两个数相加进堆,直到堆被取完,这时哈夫曼树也建成

            d.从哈夫曼树中获取哈夫曼编码,然后再根据整个字符数组来获取出现了得字符的编码

            e.获取编码后每次凑满8位就将编码串写入到压缩文件(value处理编码1与它即可,0只移动位)

             f.写好配置文件,统计每个字符及其出现次数,并以“字符+’,’+次数”的形式保存到配置文件中

         2.解压

             a.读取配置文件,统计所有字符的个数

             b.构建哈夫曼树,读解压缩文件,将所读到的编码字符的这个节点所所含的字符写入到解压缩文件中,知道将压缩文件读完

             c.压缩解压缩完全完成。

步骤:

1:建小堆(heap.h)

#pragma once  
#include <vector>  
#include<assert.h>  
#include<iostream>
using namespace std;

// 小堆  
template<class T>
struct Less
{
	bool operator() (const T& l, const T& r)
	{
		return l < r; 
	}
};

template<class T>
struct Greater
{
	bool operator() (const T& l, const T& r)
	{
		return l > r; // operator<  
	}
};

template<class T, class Compare = Less<T>>
class Heap
{
public:
	Heap()
	{}

	Heap(const T* a, size_t size)
	{
		for (size_t i = 0; i < size; ++i)
		{
			_arrays.push_back(a[i]);
		}

		// 建堆  
		for (int i = (_arrays.size() - 2) / 2; i >= 0; --i)
		{
			AdjustDown(i);
		}
	}

	void Push(const T& x)
	{
		_arrays.push_back(x);
		AdjustUp(_arrays.size() - 1);
	}

	void Pop()
	{
		assert(_arrays.size() > 0);
		swap(_arrays[0], _arrays[_arrays.size() - 1]);
		_arrays.pop_back();

		AdjustDown(0);
	}

	T& Top()
	{
		assert(_arrays.size() > 0);
		return _arrays[0];
	}

	bool Empty()
	{
		return _arrays.empty();
	}

	int Size()
	{
		return _arrays.size();
	}

	void AdjustDown(int root)
	{
		int child = root * 2 + 1;
		//    
		Compare com;
		while (child < _arrays.size())
		{
			// 比较出左右孩子中小的那个  
			//if (child+1<_arrays.size() &&  
			//  _arrays[child+1] > _arrays[child])  
			if (child + 1<_arrays.size() &&
				com(_arrays[child + 1], _arrays[child]))
			{
				++child;
			}

			//if(_arrays[child] > _arrays[root])  
			if (com(_arrays[child], _arrays[root]))
			{
				swap(_arrays[child], _arrays[root]);
				root = child;
				child = 2 * root + 1;
			}
			else
			{
				break;
			}
		}
	}

	void AdjustUp(int child)
	{
		int parent = (child - 1) / 2;

		//while (parent >= 0)  
		while (child > 0)
		{
			//if (_arrays[child] > _arrays[parent])  
			if (Compare()(_arrays[child], _arrays[parent]))
			{
				swap(_arrays[parent], _arrays[child]);
				child = parent;
				parent = (child - 1) / 2;
			}
			else
			{
				break;
			}
		}
	}

	void Print()
	{
		for (size_t i = 0; i < _arrays.size(); ++i)
		{
			cout << _arrays[i] << " ";
		}
		cout << endl;
	}

public:
	/*T* _array;
	size_t _size;
	size_t _capacity;*/
	vector<T> _arrays;
};

template<class T>
class PriorityQueue
{
public:
	void Push(const T& x)
	{
		_hp.Push(x);
	}

	void Pop()
	{
		_hp.Pop();
	}

public:
	Heap<T> _hp;
};

void Test1()
{
	int a[10] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 };
	Heap<int, Greater<int> > hp1(a, 10);
	hp1.Push(1);
	hp1.Print();

	Heap<int> hp2(a, 10);
	hp2.Push(1);
	hp2.Print();

}

#include <list>  

void Test2()
{
	vector<int> v1;
	v1.push_back(1);
	v1.push_back(2);
	v1.push_back(3);
	v1.push_back(4);

	// [)  
	vector<int>::iterator it = v1.begin();
	while (it != v1.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;


	list<int> l1;
	l1.push_back(1);
	l1.push_back(2);
	l1.push_back(3);
	l1.push_back(4);

	list<int>::iterator listIt = l1.begin();
	while (listIt != l1.end())
	{
		cout << *listIt << " ";
		++listIt;
	}

	cout << endl;
}

void AdjustDown(int* a, size_t size, int root)
{
	int child = root * 2 + 1;
	while (child < size)
	{
		if (child + 1 < size && a[child + 1] > a[child])
		{
			++child;
		}

		if (a[child] > a[root])
		{
			swap(a[child], a[root]);
			root = child;
			child = 2 * root + 1;
		}
		else
		{
			break;
		}
	}
}


void HeapSort(int* a, size_t size)
{
	// 建堆  
	for (int i = (size - 2) / 2; i >= 0; --i)
	{
		AdjustDown(a, size, i);
	}

	// 选数据排序  
	for (size_t i = 0; i < size; ++i)
	{
		swap(a[0], a[size - i - 1]);
		AdjustDown(a, size - i - 1, 0);
	}
}

void TestHeapSort()
{
	int a[10] = { 5, 9, 2, 3, 0, 1, 7, 8, 4, 6 };
	HeapSort(a, 10);
}

2:建立哈弗曼树(Huffman.h)

#pragma once  

#include "Heap.h"  
#include<assert.h>  


template<class T>
struct HuffmanTreeNode
{
	HuffmanTreeNode<T>* _left;
	HuffmanTreeNode<T>* _right;
	HuffmanTreeNode<T>* _parent;
	T _weight;

	HuffmanTreeNode(const T& x)
		:_weight(x)
		, _left(NULL)
		, _right(NULL)
		, _parent(NULL)
	{}
};

template<class T>
class HuffmanTree
{
	typedef HuffmanTreeNode<T> Node;

public:
	HuffmanTree()
		:_root(NULL)
	{}
	~HuffmanTree()
	{
		Destory(_root);
	}
	template <class T>
	struct NodeCompare
	{
		bool operator()(Node *l, Node *r)
		{
			return l->_weight < r->_weight;
		}
	};
public:
	void CreatTree(const T* a, size_t size, const T& invalid)
	{
		assert(a);
		Heap<Node*, NodeCompare<T>> minHeap;
		for (size_t i = 0; i < size; ++i)
		{
			if (a[i] != invalid)
			{
				Node* node = new Node(a[i]);
				minHeap.Push(node);
			}
		}

		while (minHeap.Size() > 1)
		{
			Node* left = minHeap.Top();
			minHeap.Pop();
			Node* right = minHeap.Top();
			minHeap.Pop();

			Node* parent = new Node(left->_weight + right->_weight);
			parent->_left = left;
			parent->_right = right;
			left->_parent = parent;
			right->_parent = parent;

			minHeap.Push(parent);
		}

		_root = minHeap.Top();
	}

	Node* GetRootNode()
	{
		return _root;
	}

	void Destory(Node* root)
	{
		if (root)
		{
			Destory(root->_left);
			Destory(root->_right);
			delete root;
			root = NULL;
		}
	}

private:
	HuffmanTreeNode<T>* _root;
};

void TestHuffmanTree()
{
	int a[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
	HuffmanTree<int> hf;
	hf.CreatTree(a, 10, -1);
}

3:压缩文件(FileCompress.h)

#pragma once  

#include "HuffmanTree.h"  
#include<algorithm>  
#include<windows.h>  

typedef long long LongType;

struct FileInfo
{
	unsigned char _ch;//字符
	LongType   _count;//次数
	string      _code;//0,1编码

	FileInfo(unsigned char ch = 0)
		:_ch(ch)
		, _count(0)
	{}
	//符号重载
	FileInfo operator+(FileInfo& fi)
	{
		FileInfo tmp;
		tmp._count = this->_count + fi._count;
		return tmp;
	}

	bool operator < (FileInfo& fi)
	{
		return this->_count < fi._count;
	}

	bool operator != (const FileInfo& fi)const
	{
		return this->_count != fi._count;
	}

};

template<class T>
class FileCompress
{
public:
	//初始化结构体数组
	FileCompress()
	{
		for (int i = 0; i < 256; ++i)
		{
			_infos[i]._ch = i;
		}
	}

public:

	bool Compress(const char* filename)
	{
		//1.打开文件,统计文件字符出现的次数  
		long long Charcount = 0;
		assert(filename);
		FILE* fOut = fopen(filename, "rb");
		assert(fOut);

		char ch = fgetc(fOut);

		while (ch != EOF)
		{
			_infos[(unsigned char)ch]._count++;
			ch = fgetc(fOut);
			Charcount++;
		}

		//2.生成对应的huffman编码  
		GenerateHuffmanCode();

		//3.压缩文件  
		string compressFile = filename;
		compressFile += ".compress";
		FILE* fwCompress = fopen(compressFile.c_str(), "wb");
		assert(fwCompress);

		fseek(fOut, 0, SEEK_SET);//把文件指针指向文件头部
		ch = fgetc(fOut);
		char inch = 0;
		int index = 0;
		while (ch != EOF)
		{
			string& code = _infos[(unsigned char)ch]._code;
			for (size_t i = 0; i < code.size(); ++i)
			{
				inch = inch << 1;
				if (code[i] == '1')
				{
					inch |= 1;
				}
				if (++index == 8)
				{
					fputc(inch, fwCompress);//够八位的情况下存入到压缩文件
					inch = 0;
					index = 0;
				}
			}
			ch = fgetc(fOut);    //读取下一个
		}

		if (index)        //不够八位进行补零
		{
			inch = inch << (8 - index);
			fputc(inch, fwCompress);//写入到压缩文件
		}

		//4.配置文件,方便后续的解压缩  
		string configFile = filename;
		configFile += ".config";
		FILE *fconfig = fopen(configFile.c_str(), "wb");
		assert(fconfig);

		char CountStr[128];
		_itoa(Charcount >> 32, CountStr, 10);
		fputs(CountStr, fconfig);
		fputc('\n', fconfig);
		_itoa(Charcount & 0xffffffff, CountStr, 10);
		fputs(CountStr, fconfig);
		fputc('\n', fconfig);

		FileInfo invalid;
		for (int i = 0; i < 256; i++)
		{
			if (_infos[i] != invalid)
			{
				fputc(_infos[i]._ch, fconfig);
				fputc(',', fconfig);
				fputc(_infos[i]._count + '0', fconfig);
				fputc('\n', fconfig);
			}
		}

		fclose(fOut);
		fclose(fwCompress);
		fclose(fconfig);

		return true;
	}

	bool UnCompresss(const char* filename)
	{
		string configfile = filename;
		configfile += ".config";
		FILE* outConfig = fopen(configfile.c_str(), "rb");
		assert(outConfig);
		char ch;
		long long Charcount = 0;
		string line = ReadLine(outConfig);
		Charcount = atoi(line.c_str());
		Charcount <<= 32;
		line.clear();
		line = ReadLine(outConfig);
		Charcount += atoi(line.c_str());
		line.clear();

		while (feof(outConfig))
		{
			line = ReadLine(outConfig);
			if (!line.empty())
			{
				ch = line[0];
				_infos[(unsigned char)ch]._count = atoi(line.substr(2).c_str());
				line.clear();
			}
			else
			{
				line = '\n';
			}
		}

		HuffmanTree<FileInfo> ht;
		FileInfo invalid;
		ht.CreatTree(_infos, 256, invalid);

		HuffmanTreeNode<FileInfo>* root = ht.GetRootNode();

		string  UnCompressFile = filename;
		UnCompressFile += ".uncompress";
		FILE* fOut = fopen(UnCompressFile.c_str(), "wb");

		string CompressFile = filename;
		CompressFile += ".compress";
		FILE* fIn = fopen(CompressFile.c_str(), "rb");

		int pos = 8;
		HuffmanTreeNode<FileInfo>* cur = root;
		ch = fgetc(fIn);

		while ((unsigned char)ch != EOF)
		{
			--pos;
			if ((unsigned char)ch &(1 << pos))
			{
				cur = cur->_right;
			}
			else
			{
				cur = cur->_left;
			}
			if (cur->_left == NULL && cur->_right == NULL)
			{
				fputc(cur->_weight._ch, fOut);
				cur = root;
				Charcount--;
			}
			if (pos == 0)
			{
				ch = fgetc(fIn);
				pos = 8;
			}
			if (Charcount == 0)
			{
				break;
			}
		}

		fclose(outConfig);
		fclose(fIn);
		fclose(fOut);
		return true;
	}

protected:

	string ReadLine(FILE* fConfig)
	{
		char ch = fgetc(fConfig);
		if (ch == EOF)
		{
			return "";
		}
		string line;
		while (ch != '\n' && ch != EOF)
		{
			line += ch;
			ch = fgetc(fConfig);
		}
		return line;
	}

	void GenerateHuffmanCode()
	{
		HuffmanTree<FileInfo> hft;
		FileInfo invalid;
		hft.CreatTree(_infos, 256, invalid);
		_GenerateHuffmanCode(hft.GetRootNode());
	}

	void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root)
	{
		if (root == NULL)
		{
			return;
		}

		_GenerateHuffmanCode(root->_left);
		_GenerateHuffmanCode(root->_right);

		if (root->_left == NULL && root->_right == NULL)
		{
			HuffmanTreeNode<FileInfo>* cur = root;
			HuffmanTreeNode<FileInfo>* parent = cur->_parent;
			string& code = _infos[cur->_weight._ch]._code;

			while (parent)
			{
				if (parent->_left == cur)
				{
					code += '0';
				}
				else if (parent->_right == cur)
				{
					code += '1';
				}
				cur = parent;
				parent = cur->_parent;
			}

			reverse(code.begin(), code.end());
		}
	}

private:
	FileInfo _infos[256];
};

void TestFileCompress()
{

	FileCompress<FileInfo> fc;

	int begin1 = GetTickCount();
	fc.Compress("wyf.txt");
	int end1 = GetTickCount();
	cout << end1 - begin1 << endl;

	int begin2 = GetTickCount();
	fc.UnCompresss("wyf.txt");
	int end2 = GetTickCount();
	cout << end2 - begin2 << endl;
}

4:主函数:

#define _CRT_SECURE_NO_WARNINGS  

#include <iostream>  
using namespace std;

#include "HuffmanTree.h"  
#include "FileCompress.h"  

int main()
{
	TestFileCompress();
	return 0;
}

    原文作者:哈夫曼树
    原文地址: https://blog.csdn.net/Poison_biting/article/details/52705641
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞