这个程序从上周复习完树开始写,中途一有空就调这程序,到今天才正式写完。
中途遇到不少问题,但是写完了确实有成就感,也觉得这是自己目前写得最有意义的一个程序.
这个程序的思路如下
1.按字节读取文件
2.将1个字节看做一个单位统计每个单位出现的次数(0x00-0xff)
3.根据出现的次数创建一颗哈弗曼树
4.根据哈弗曼树创建对应的索引表
5.重新读取文件,根据索引表找出每个单位的哈弗曼编码写入文件,这里有一个很难搞定的东西,就是每个单位的哈弗曼编码是不等长的,有的不足一字节,有的甚至达到5,6个字节,为了将这些东西写入文件我做了很多位的处理,希望看明白我代码的通知有更好的处理办法告诉我
解压按照上面5步反过来既可!
写这个程序有一大半时间花在处理末尾字节的乱码上,试过很多种办法,最后发现自己越走越远,办法越来越复杂。
最后用了一个很简单却有效的方法,用了一个long long型的变量来统计文件的位数,long long型变量在的范围在2^64-1/2,完全不存在溢出的可能性,以前的代码为了处理这最后几个字节乱码凭空多了100多行,没想到就这么解决了,所以说,简单往往也有效。
最后,上代码
Code:
- /*利用哈夫曼编ç ?对文件进行å?‹ç¼©*/
- /*ç«¥*/
- #include<stdio.h>
- #include<malloc.h>
- #include<stdlib.h>
- #include<string.h>
- #define LEN 1024/*step 1k*/
- #define MAX 256
- #define CHAR_B 8
- #define TREE_DEEP (MAX/2+1)/*å“ˆå¤«æ›¼æ ‘æœ€å¤§é«˜åº¦æ˜¯MAX/2+1*/
- typedef unsigned char uc;
- typedef struct node{
- int item;/*出�的次数*/
- int i;/*å¯¹åº”çš„ä¸‹æ ‡*/
- struct node *left;
- struct node *right;
- }*link;
- /*FIXME:没有释放哈夫曼的内å˜*/
- /*节点æ?„é€ å‡½æ•°:用äº?æ?„é€ ä¸€ä¸ª2å?‰æ ‘节点*/
- link NODE(int item, int i, link left, link right)
- {
- link t = malloc(sizeof *t);
- t->item = item;
- t->left = left;
- t->right = right;
- t->i = i;
- return t;
- }
- /*å †çš„ä¸‹ç§»æ“?作函数*/
- void shif_down(link heap_head[], int i, int len)
- {
- int tmp = i;
- while(2*tmp <= len)
- {
- i = tmp;
- tmp *= 2;
- if(tmp+1 <= len && heap_head[tmp+1]->item < heap_head[tmp]->item)
- tmp++;/*判æ–å·¦å?³å?æ ‘çš„å¤§å°?*/
- if(heap_head[i]->item > heap_head[tmp]->item)
- {
- link mid = heap_head[i];
- heap_head[i] = heap_head[tmp];
- heap_head[tmp] = mid;
- }
- }
- }
- /*å †çš„ä¸Šç§»*/
- void shif_up(link heap_head[], int i)
- {
- int father = i/2;
- link tmp;
- while(father >= 1)
- {
- if(heap_head[father]->item > heap_head[i]->item)
- {
- tmp = heap_head[father];
- heap_head[father] = heap_head[i];
- heap_head[i] = tmp;
- }
- father = father/2;
- }
- }
- /*åˆ›å»ºå †*/
- void build_heap(link heap_head[])
- {
- int i;
- for(i = MAX / 2; i > 0; i–)
- shif_down(heap_head, i, MAX);/*max/2是ä»?å??开始第一个有å©å?节点的节点*/
- }
- /*åˆ é™¤å¹¶è¿”å›?å †é¡¶å…ƒç´ */
- link delmin(link heap_head[], int *len)
- {
- link ret = heap_head[1];
- link tmp = heap_head[1];
- heap_head[1] = heap_head[*len];
- heap_head[*len] = tmp;
- (*len)–;
- shif_down(heap_head, 1, *len);
- return ret;
- }
- /*å¾€å †ä¸æ?’å…¥å…ƒç´ */
- void insert(link heap_head[], int *len, link item)
- {
- heap_head[++(*len)] = item;
- shif_up(heap_head, *len);
- }
- /*åˆ›å»ºå“ˆå¤«æ›¼æ ‘*/
- link creat_huffman(int count[])
- {
- int len = MAX, i;
- link* heap_head = malloc(len*sizeof(link));//å †ä¸å˜æ”¾æŒ‡é’ˆ
- link min1, min2;
- heap_head–;//å †ä¸‹æ ‡ä»?1开始
- for(i = 1; i <= len; i++)
- heap_head[i] = NODE(count[i-1],i – 1, NULL, NULL);
- build_heap(heap_head);
- while(len > 1)
- {
- min1 = delmin(heap_head, &len);
- min2 = delmin(heap_head, &len);
- insert(heap_head, &len, NODE(min1->item + min2->item, -1 ,min1, min2));
- }
- return delmin(heap_head, &len);
- }
- /*统计文件ä¸0x00-0xff分别的个数*/
- void char_count(int count[], uc *buf, int len)
- {
- int i;
- for(i = 0; i < len; i++)
- count[buf[i]]++;
- }
- void print_count(int count[])
- {
- int i;
- for(i = 0; i < MAX; i++)
- printf(“%d “, count[i]);
- }
- void print_huffman(link head)
- {
- if(head){
- // printf(“(“);
- printf(“%d “, head->item);
- print_huffman(head->left);
- print_huffman(head->right);
- // printf(“)”);
- }
- // else printf(“()”);
- }
- /*打�建立的表*/
- void print_table(char *table[])
- {
- int i;
- for(i = 0; i < MAX; i++){
- while(*table[i] != 0)
- putchar(*table[i]++ + ‘0’);
- printf(“/n”);
- }
- }
- /*创建用äº?æŸ¥è¯¢çš„è¡¨ï¼Œä¸‹æ ‡æ˜¯0x00-0xff之间的值,对应的table[i]是一个å—符串,
- *该å—符串用äº?å˜æ”¾i值的哈夫曼编ç ?,为区分0ä¸?/0用2表示0
- *�数1是一个指针数组,将得到的所有内容带�
- *å?‚æ•°2æ˜¯åˆ›å»ºçš„å“ˆå¤«æ›¼æ ‘çš„æ ¹
- *å?‚æ•°3带å›?本文件å?‹ç¼©å??çš„2进制ç ?总长度
- * */
- void creat_table(char *table[], link head, long long *count)
- {
- static int deep = 0;
- static char buf[MAX] = {0};/*2 means the code 0*/
- if(head != NULL)
- {
- deep++;
- if(head->i >= 0)
- {
- table[head->i] = malloc((deep + 1) * sizeof(char));
- buf[deep – 1] = ‘/0’;
- (*count) += ((deep -1)*head->item);
- strcpy(table[head->i], buf);
- }
- buf[deep-1] = 2;
- // buf[deep] = ‘/0’;
- creat_table(table, head->left, count);
- buf[deep-1] = 1;
- creat_table(table, head->right, count);
- –deep;
- // buf[–deep] = ‘/0’;
- }
- }
- /*�缩文件*/
- void compress(char *filename)
- {
- FILE *in, *out;
- int count[MAX] = {0};
- uc buf[LEN] = {0}, ch, bit_buf = 0;
- char *table[MAX] = {0}, name[LEN];
- int i, j;
- link head;
- long long bit_count = 0;
- strncpy(name, filename, LEN);
- if((in = fopen(name, “rb”)) == NULL)
- {
- printf(“Can not open file %s/n”, name);
- exit(0);
- }
- while(1)
- {
- for(i = 0; i < LEN && !feof(in); i++)
- buf[i] = (uc)fgetc(in);
- if(!feof(in))
- {
- char_count(count, buf, i);
- }
- else
- {
- char_count(count, buf, i – 1);/*多读了1å—节*/
- break;
- }
- }
- head = creat_huffman(count);
- creat_table(table, head, &bit_count);
- fclose(in);
- if((in = fopen(name, “r”)) == NULL)
- {
- perror(“Error”);
- exit(0);
- }
- strncat(name, “.t”, LEN);
- if((out = fopen(name, “w”)) == NULL)
- {
- perror(“Error”);
- exit(0);
- }
- /*写入�缩信�*/
- for(i = 0; i < MAX; i++)
- {
- fprintf(out, “%d/n”, count[i]);
- }
- ch = fgetc(in);
- j = 0;
- while(1)
- {
- if(feof(in)) break;
- for(i = 0; i < CHAR_B; i++, j++)/*j需�上一次的结�*/
- {
- bit_buf = (bit_buf<<1) | (!((uc)table[ch][j]>>1));
- // bit_count++;
- if(table[ch][j + 1] == ‘/0’)
- {
- ch = fgetc(in);
- j = -1;
- if(feof(in)) break;
- }
- }
- if(i == CHAR_B)
- {
- fputc(bit_buf, out);
- }
- else
- {
- fputc(bit_buf << (CHAR_B – i -1), out);
- }
- }
- fclose(in);
- fclose(out);
- }
- /*解�缩文件*/
- void decompress(char *filename)
- {
- FILE *in, *out;
- int count[MAX] = {0};
- uc ch, bit_buf = 0;
- char *table[MAX] = {0}, name[LEN];
- int i;
- link head, p;
- long long bit_count = 0;
- strncpy(name, filename, LEN);
- if((in = fopen(name, “r”)) == NULL)
- {
- perror(“Error”);
- exit(0);
- }
- name[strlen(name) – 2] = ‘/0’;
- if((out = fopen(name, “w”)) == NULL)
- {
- perror(“Error”);
- exit(0);
- }
- for(i = 0; i < MAX; i++)
- {
- fscanf(in, “%d/n”, &count[i]);
- }
- bit_count = 0;
- head = creat_huffman(count);
- creat_table(table, head, &bit_count);
- ch = fgetc(in);
- bit_buf = fgetc(in);
- i = 0;
- while(bit_count > 0)
- {
- p = head;
- for(;p->left != NULL && p->right != NULL; i++)
- //i需�使用上一次循�的结�
- {
- if(i == CHAR_B)
- {
- i = 0;
- ch = bit_buf;
- bit_buf = fgetc(in);
- }
- if(ch&0x80)
- {
- p = p->right;
- }
- else
- {
- p = p->left;
- }
- ch = (ch << 1) | (bit_buf >> (CHAR_B – i – 1));
- bit_count–;
- }
- fputc(p->i, out);
- }
- fclose(in);
- fclose(out);
- }
- #if 1
- int main(int argc, char *argv[])
- {
- int len;
- if(argc > 3 || argc < 3 || !((argv[1][1] == ‘C’) ||(argv[1][1] == ‘D’)))
- {
- fprintf(stderr, “Usage <%s>[-C/D]<filename>/nC:compress/nD:decompress/n”, argv[0]);
- exit(1);
- }
- if(argv[1][1] == ‘C’)
- {
- compress(argv[2]);
- }
- else
- {
- len = strlen(argv[2]);
- if(argv[2][len – 1] != ‘t’ || argv[2][len – 2] != ‘.’)
- {
- fprintf(stderr, “%s is not a .t file/n”, argv[2]);
- exit(1);
- }
- decompress(argv[2]);
- }
- return 0;
- }
- #endif
- #if 0
- int main(void)
- {
- int count[] = {5, 29, 7, 8, 14, 23, 3, 11};
- char *table[MAX] = {0};
- link head = creat_huffman(count);
- creat_table(table, head);
- print_table(table);
- printf(“//tree”);
- print_huffman(head);
- printf(“/n”);
- return 0;
- }
- #endif