这个代码是我为了完成老师布置的大作业借鉴一个哈弗曼编码实现压缩解压缩的论文写的,并结合游程编码的算法使其完善。游程编码适合重复性高的文本。算法简单,速度快。哈弗曼编码算法难,但是运用广泛。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <conio.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#define OK 1
#define ERROR 0
struct node
{
long weight;
unsigned char ch;
int parent,lchild,rchild;
char code[256];
int Length;
}huffmannode[512];
int compress();
int uncompress();
int select(struct node *node,int n);
int coding(struct node *node,int n);
int main()
{
int choice;
printf("please choose the operation:\n");
printf("1.compress file\n");
printf("2.uncompress file\n");
printf("3.exit\n");
scanf("%d",&choice);
if(choice==1)compress();
else if(choice==2)uncompress();
else if(choice==3)return 0;
else printf("ERROR!\n");
return 0;
}
int select(struct node *node,int n)//找出两个无父节点且权值最低的结点
{
int i;
int s;
long min=214748364;
for(i=0;i<=n;i++)
{
if(node[i].parent!=-1)continue;
if(node[i].weight<min)
{
min=node[i].weight;
s=i;
}
}
return s;
}
int coding(struct node*node,int n)//哈弗曼编码
{
int begin;
int i,f,c;
char codes[256];
codes[n-1]='\0';
for(i=0;i<n;i++)
{
begin=n-1;
for(c=i,f=node[i].parent;f!=-1;c=f,f=node[f].parent)
{
begin--;
if(node[f].lchild==c)
codes[begin]='0';
else
codes[begin]='1';
}
strcpy(node[i].code,&codes[begin]);
node[i].Length=strlen(node[i].code);
}
return OK;
}
int compress()//压缩
{
int count,n,m,j,i,s1,s2;
char infile[20],outfile[20],ch1=0,ch2=0,ch='#',codes[256];
unsigned char c;
FILE *ifp,*ofp1,*ofp;
long Filelength=0,filelength=0,lengenth=0,ff;
double time;
float rate,speed;
clock_t begin,finish;
printf("please input the path and the name of the source file:");
scanf("%s",infile);
ifp=fopen(infile,"rb");
if(ifp==NULL)
{
printf("Not found!\n");
return ERROR;
}
ofp1=fopen("D:\\1.txt","wb");
printf("please input the path and the name of the object file:");
scanf("%s",outfile);
ofp=fopen(outfile,"wb");
if(ofp==NULL)
{
printf("Not found!\n");
return ERROR;
}
begin=clock();
while(!feof(ifp))
{
fread(&ch1,1,1,ifp);
Filelength++;
}
Filelength=Filelength-1;//统计源文件的字节长度(最后一个字符会多读一次,所以要减1)
ff=Filelength;
fseek(ifp,0,SEEK_SET);
fread(&ch1,1,1,ifp);
count=1;
filelength++;
while(!feof(ifp))//将游程编码写进中间文件
{
fread(&ch2,1,1,ifp);
if(ch2==ch1)
count++;
else
{
if(count>=3)
{
fwrite(&ch,1,1,ofp1);
fwrite(&ch1,1,1,ofp1);
fwrite(&count,1,1,ofp1);
}
else
{
if(count==1)
fwrite(&ch1,1,1,ofp1);
else if(count==2)
{
fwrite(&ch1,1,1,ofp1);
fwrite(&ch1,1,1,ofp1);
}
}
ch1=ch2;
count=1;
}
filelength++;
if(filelength==Filelength)
break;
}
if(count>=3)
{
fwrite(&ch,1,1,ofp1);
fwrite(&ch1,1,1,ofp1);
fwrite(&count,1,1,ofp1);
}
else
{
if(count==1)
fwrite(&ch1,1,1,ofp1);
else if(count==2)
{
fwrite(&ch1,1,1,ofp1);
fwrite(&ch1,1,1,ofp1);
}
}
fclose(ifp);
fclose(ofp1);
ofp1=fopen("D:\\1.txt","rb");
Filelength=0;//Filelength为中间文件的字节长度
while(!feof(ofp1))
{
fread(&c,1,1,ofp1);
huffmannode[c].weight++;
Filelength++;
}
Filelength=Filelength-1;
huffmannode[c].weight--;//结点下标为ASCII码值
n=0;
for(i=0;i<256;i++)
if(huffmannode[i].weight!=0)
{
huffmannode[i].ch=(unsigned char)i;
n++;//n表示字符出现的种类数
huffmannode[i].lchild=huffmannode[i].rchild=huffmannode[i].parent=-1;
}
m=2*n-1;//哈弗曼树结点总数
j=0;
for(i=0;i<256;i++)
if(huffmannode[i].weight!=0)
{
huffmannode[j]=huffmannode[i];
j++;
}
for(i=n;i<m;i++)
huffmannode[i].lchild=huffmannode[i].rchild=huffmannode[i].parent=-1;
for(i=n;i<m;i++)
{
s1=select(huffmannode,i-1);
huffmannode[i].lchild=s1;
huffmannode[s1].parent=i;
s2=select(huffmannode,i-1);
huffmannode[i].rchild=s2;
huffmannode[s2].parent=i;
huffmannode[i].weight=huffmannode[s1].weight+huffmannode[s2].weight;
}
coding(huffmannode,n);
fseek(ofp1,0,SEEK_SET);
fwrite(&Filelength,4,1,ofp);//将源文件长度写入目标文件,以便解压
fseek(ofp,8,SEEK_SET);
codes[0]=0;
filelength=0;
while(!feof(ofp1))//将中间文件每个字符的编码所对应的字符写入目标文件
{
filelength++;
fread(&c,1,1,ofp1);
for(i=0;i<n;i++)
if(c==huffmannode[i].ch)break;
strcat(codes,huffmannode[i].code);
while(strlen(codes)>=8)
{
for(i=0;i<8;i++)
{
if(codes[i]=='1')
c=(c<<1)|1;
else
c=c<<1;
}
fwrite(&c,1,1,ofp);
lengenth++;
strcpy(codes,codes+8);
}
if(filelength==Filelength)
break;
}
if(strlen(codes)>0)//最后有多余的编码补0
{
strcat(codes,"00000000");
for(i=0;i<8;i++)
{
if(codes[i]=='1')
ch1=(ch1<<1)|1;
else
ch1=ch1<<1;
}
fwrite(&ch1,1,1,ofp);
lengenth++;
}//这里的lengenth代表源文件的字符的编码所对应的字符个数,源文件编码区的字符个数
lengenth=lengenth+8;
fseek(ofp,4,SEEK_SET);
fwrite(&lengenth,4,1,ofp);
fseek(ofp,lengenth,SEEK_SET);
fwrite(&n,4,1,ofp);//n代表哈弗曼叶子总数
count=0;
for(i=0;i<n;i++)
{
fwrite(&(huffmannode[i].ch),1,1,ofp);//把哈弗曼叶子结点所带的字符写进目标文件
ch1=huffmannode[i].Length;
fwrite(&ch1,1,1,ofp);//把哈弗曼叶子结点所带字符的编码位数写进目标文件
if(huffmannode[i].Length%8!=0)
for(j=huffmannode[i].Length%8;j<8;j++)
strcat(huffmannode[i].code,"0");
while(huffmannode[i].code[0]!=0)
{
ch1=0;
for(j=0;j<8;j++)
{
if(huffmannode[i].code[j]=='1')
c=(c<<1)|1;
else
c=c<<1;
}
strcpy(huffmannode[i].code,huffmannode[i].code+8);
count++;
fwrite(&c,1,1,ofp);//把哈弗曼结点所带的字符的编码所对应的字符写入目标文件
}
}
printf("\n");
finish=clock();
time=(double)(finish-begin)/CLOCKS_PER_SEC;
printf("the time of the compression is:%fseconds\n",time);
speed=(float)Filelength/time/1000;
printf("the speed of the compression is:%5.2fKB/S\n",speed);
printf("\n");
printf("the length of the source file is:%ld bites\n",ff);
lengenth=lengenth+4+n*2+count;
printf("the length of the object file is:%ld bites\n",lengenth);
rate=(float)lengenth/(float)ff;
printf("the rate of the compression is:%4.2f%%%\n",rate*100);
fclose(ofp1);
fclose(ofp);
remove("D:\\1.txt");
return OK;
}
//解压
int uncompress()
{
clock_t begin,finish;
double time;
FILE *ifp,*ofp1,*ofp;
char infile[20],outfile[20];
long Filelength,filelength,lengenth,ff=0;
int n,m;
int i,j,k,flag1=0,flag2=0;
char string[256],codes[256];
unsigned char c,b=0;
int maxlength;
float speed;
printf("please input the path and the name of the source file:");
scanf("%s",infile);
ifp=fopen(infile,"rb");
if(ifp==NULL)
{
printf("Not found!\n");
return ERROR;
}
ofp1=fopen("D:\\2.txt","wb");
printf("please input the path and the name of the object file:");
scanf("%s",outfile);
ofp=fopen(outfile,"wb");
if(ofp==NULL)
{
printf("Not found!\n");
return ERROR;
}
begin=clock();
fread(&Filelength,4,1,ifp);//Filelength代表哈弗曼编码之前文件的长度
fread(&lengenth,4,1,ifp);//lengenth哈弗曼编码之前的文件经过编码后长度+8
fseek(ifp,lengenth,SEEK_SET);
fread(&n,4,1,ifp);
fseek(ifp,0,SEEK_SET);
while(!feof(ifp))
{
fread(&c,1,1,ifp);
ff++;
}
ff--;//需解压的文件的长度
printf("the length of the source file is %d bites\n",ff);
fseek(ifp,lengenth+4,SEEK_SET);//将指针移到叶子结点信息的区域
for(i=0;i<n;i++)//将源文件中叶子结点信息全都赋给huffmannode
{
fread(&huffmannode[i].ch,1,1,ifp);
fread(&c,1,1,ifp);
huffmannode[i].Length=c;
huffmannode[i].code[0]=0;
if(huffmannode[i].Length%8>0)
m=huffmannode[i].Length/8+1;
else
m=huffmannode[i].Length/8;
for(j=0;j<m;j++)
{
fread(&c,1,1,ifp);
itoa(c,string,2);
for(k=8;k>strlen(string);k--)
strcat(huffmannode[i].code,"0");
strcat(huffmannode[i].code,string);
}
huffmannode[i].code[huffmannode[i].Length]=0;
}
maxlength=0;
for(i=0;i<n;i++)
if(huffmannode[i].Length>maxlength)
maxlength=huffmannode[i].Length;
fseek(ifp,8,SEEK_SET);
filelength=0;
codes[0]=0;
string[0]=0;
while(1)//解哈弗曼编码
{
while(strlen(codes)<maxlength)
{
fread(&c,1,1,ifp);
itoa(c,string,2);
for(k=8;k>strlen(string);k--)
strcat(codes,"0");
strcat(codes,string);
}
for(i=0;i<n;i++)
if(memcmp(huffmannode[i].code,codes,(unsigned int)huffmannode[i].Length)==0)
break;
strcpy(codes,codes+huffmannode[i].Length);
c=huffmannode[i].ch;
fwrite(&c,1,1,ofp1);
filelength++;
if(filelength==Filelength)
break;
}
fclose(ofp1);
ofp1=fopen("D:\\2.txt","rb");
filelength=0;
while(!feof(ofp1))
{
fread(&c,1,1,ofp1);
filelength++;
}
filelength--;
Filelength=0;
fseek(ofp1,0,SEEK_SET);
while(!feof(ofp1))//游程解压
{
Filelength++;
fread(&c,1,1,ofp1);
if(c=='#')
{
flag1=flag2=1;
continue;
}
else
{
if(flag1==1)
{
b=c;
flag1=0;
continue;
}
else
{
if(flag2==1)
for(i=0;i<c;i++)
fwrite(&b,1,1,ofp);
else
fwrite(&c,1,1,ofp);
flag2=0;
}
}
if(Filelength==filelength)
break;
}
finish=clock();
time=(double)(finish-begin)/CLOCKS_PER_SEC;
fseek(ofp1,0,SEEK_SET);
Filelength=0;
while(!feof(ofp1))
{
fread(&c,1,1,ofp1);
Filelength++;
}
Filelength--;
speed=(float)ff/time/1000;
printf("the speed of the uncompression is:%5.2fKB/S\n",speed);
fclose(ifp);
fclose(ofp1);
fclose(ofp);
remove("D:\\2.txt");
return OK;
}