PAT Huffman Codes 哈弗曼树,C语言实现

题目链接:https://pintia.cn/problem-sets/900290821590183936/problems/914044227287445504
本小白刚刚起步,最近在刷浙江大学PAT上的题目,做到了哈弗曼树的这一题,绞尽脑汁,网上百度的C代码(可能有的C++可以实现,但是我还不会C++)拉到PTA上去跑也没有能实现的,于是下决心一定要自己做出来,把每个细节都考虑到,一步步实现终于做出来了,每个测试点都正确,成就感满满的,一激动就开了个CSDN账号来分享一下C语言实现的Huffman Codes,谨供像我这样的小白参考一下。

原题:
05-树9 Huffman Codes(30 分)

In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {‘0’ – ‘9’, ‘a’ – ‘z’, ‘A’ – ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

Output Specification:

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No

我的原代码:

//在建树过程中特别要注意:使得H->Elements[]与HuffmanTree数据交换的过程中保证 
//是以HuffmanTree的格式交换的,(例如:H->Element[].weight=T->weight就不行,
//必须为H->Element[]=*T)这样才能将HuffmanTree的左右指针保留,使整棵树的连续性得以保留 ,不然那会造成段错误 
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct TreeNode *HuffmanTree;
struct TreeNode{
    int weight;
    HuffmanTree left,right;
};

typedef struct HeapStruct *MinHeap;
struct HeapStruct{
    HuffmanTree Elements;       //it's so genius to make the Elements' type to be HuffmanTree
    int Size;
    int Capacity;
};

MinHeap MinHeap_Create(int Maxsize)         //construct a MinHeap
{
    MinHeap H=(MinHeap)malloc(sizeof(struct HeapStruct));
    H->Elements=(HuffmanTree)malloc((Maxsize+1)*sizeof(struct TreeNode));       //one more space for Elements[0]
    H->Size=0;
    H->Capacity=Maxsize;
    H->Elements[0].weight=-1;           //哨兵
    return H; 
}

void Insert(MinHeap H,HuffmanTree T)
{
    int i;
    if(H->Size==H->Capacity)
    {
        printf("最大堆已满");
        return ;
    }
    i=++H->Size;
    for(;H->Elements[i/2].weight>T->weight;i/=2)        // 
    {                                                   //
        H->Elements[i]=H->Elements[i/2];                //
        H->Elements[i/2]=*T;                            //
    }                                                   //
    H->Elements[i]=*T;                                  //
}

MinHeap BuildMinHeap(int Weight[],int Maxsize,char CH[])
{
    int i;
    MinHeap H=MinHeap_Create(Maxsize);
    HuffmanTree Temp=(HuffmanTree)malloc(sizeof(struct TreeNode));
    for(i=0;i<Maxsize;i++)
    {
        Temp->weight=Weight[CH[i]];
        Temp->left=NULL;
        Temp->right=NULL;
        Insert(H,Temp);
    }
    free(Temp);
    return H;
}

HuffmanTree DeleteMin(MinHeap H)
{
    int Parent,Child;
    HuffmanTree MinItem,temp;
    if(H->Size==0)
    {
        printf("最小堆已空");
        return ;
    }
    MinItem=(HuffmanTree)malloc(sizeof(struct TreeNode));
    temp=(HuffmanTree)malloc(sizeof(struct TreeNode));
    *MinItem=H->Elements[1];

    *temp=H->Elements[H->Size--];           //先使temp指向最后一个元素,然后再将size-1 
    for(Parent=1;Parent*2<=H->Size;Parent=Child)
    {
        Child=Parent*2;
        if(Child!=H->Size&&(H->Elements[Child].weight>H->Elements[Child+1].weight))
        Child++;
        if(temp->weight<=H->Elements[Child].weight)break;       //此时可将小于child的temp赋给其parent
        else
        H->Elements[Parent]=H->Elements[Child]; 
    }
    H->Elements[Parent]=*temp;              //temp is a pointer,add * to be a HuffmanTree
    free(temp);
    return MinItem;
}

HuffmanTree Huffman(MinHeap H)      //construct a HuffmanTree
{
    int i;
    HuffmanTree T;
    int k;
    k=H->Size;
    for(i=1;i<k;i++)            //size个元素两两合并要size-1次 
    {
        T=(HuffmanTree)malloc(sizeof(struct TreeNode));
        T->left=DeleteMin(H);
// printf("chkl%d ",T->left->weight);
        T->right=DeleteMin(H);
// printf("chkr%d ",T->right->weight);
        T->weight=T->left->weight+T->right->weight;
// printf("chkw%d\n",T->weight);
        Insert(H,T);
    }

    T=DeleteMin(H);

    return T;
}

int Calwpl(HuffmanTree T,int Depth)     //calculate the WPL value of a HuffmanTree
{
    if(T->left==NULL&&T->right==NULL)   //dont know why is the//if(!T->left&&!T->right)//is a Segmentation Fault(段错误) 
    {
// printf("!%d\n",Depth*T->weight);
        return Depth*T->weight;
    }
    else
    {
        return
     (Calwpl(T->left,Depth+1)+
     Calwpl(T->right,Depth+1));
    }
}

int isPreFix(char a[],char b[])     //借口应该是字符数组,而不是指向字符的指针。原来的错误写法:(char *a,char *b)
{
    while(a&&b&&*a==*b)             //*号不能掉,这时候是值的比较,指针是不可能相等的 
    {
        a++;b++;
    }
    if(*a=='\0'||*b=='\0')          //表示指针所指的值时要带星号 
    return 1;
    else return 0;
}

int HasPreFix(char s[][200],int n)
{
    int i,j;
    for(i=0;i<n;i++)
    {
        for(j=i+1;j<n;j++)          //*****最终错误******j!=i,故j=i+1 
        {
            if(isPreFix(s[i],s[j]))
            return 1; 
        }
    }
    return 0;
}

/* void test(HuffmanTree T) { if(T->left!=NULL&&T->right!=NULL) { printf("*%d %d*",T->left->weight,T->right->weight); test(T->left); test(T->right); } else printf("GG"); return; } */

int main()
{
    int n,i,freq[256];
    char CH[100];
    int num[100];
    scanf("%d",&n);
    for(i=0;i<n;i++)
    {
        scanf(" %c %d",&CH[i],&num[i]);
        freq[CH[i]]=num[i];
    }

    MinHeap H=BuildMinHeap(freq,n,CH);

// for(i=1;i<=n;i++)
// {
// printf("$%d ",H->Elements[i].weight);
// }

    HuffmanTree T=Huffman(H);

// printf("@%d\n",T->weight);
// printf("@%d @%d\n",T->left->weight,T->right->weight);
// printf("@%d,@%d,@%d,@%d\n",T->left->left->weight,T->left->right->weight,T->right->left->weight,T->right->right->weight);

// test(T);

    int wpl=Calwpl(T,0);

// printf("\n%d\n",wpl);

    int k;
    scanf("%d",&k);
    while(k--){
        char ch[256];
        char str[256][200];
        int thiswpl=0;
        for(i=0;i<n;i++)
        {
            scanf(" %c %s",&ch[i],str[i]);          //输入%c之前一定要有空格,因为前面一个scanf输入之后有回车 
            thiswpl+=freq[ch[i]]*strlen(str[i]);
// printf("$%d\n",thiswpl);
// printf("$%d\n",freq[ch[i]]);
// printf("$%d\n",strlen(str[i]));
        }
        if(wpl==thiswpl&&!HasPreFix(str,n))
        printf("Yes\n");
        else
        printf("No\n");
    }   
    return 0;
}
    原文作者:哈夫曼树
    原文地址: https://blog.csdn.net/qq_36770418/article/details/78532042
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞