回 帖 发 新 帖 刷新版面

主题:[原创]用哈夫曼树实现文件的压缩

刚写的一个上机实验题,可以模拟压缩文本文件,只是文件中带了数字后就出错了,不知怎么回事?




/*=================================================*/
/*                  哈夫曼树编码                   */
/*用哈夫曼树算法对文本文件进行0-1化编码,并模拟文件*/
/*压缩与解压                                       */
/*作者:踏网无痕                                   */
/*时间:2004/05/27-29                              */
/*=================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>

#define MAX_SINGLECODE_LEN 10        //单个字符最大码长
#define MAX_STRING_LEN 1000            //要编码的字符串的最大长度
#define MAX_CODESTRING_LEN 10000             //产生的二进制码的最大长度
#define MAX_WORDS 1000            //要编码的字符串中字符种数最大值
#define END_TREE 30000            //树部分存储的结束符
#define PATH_LEN 50            //路径串最大长度

/*****哈夫曼树结构定义*****/
typedef struct Huffmantree
{
    char ch;                //字符部分
    int weight;                //结点权值
    int    mark;                //标记是否加入树中
    struct Huffmantree *parent,*lchild,*rchild,*next;
}HTNode,*LinkTree;

/*****编码字典结构定义*****/
typedef struct
{
    char ch;                //字符部分
    char code[MAX_SINGLECODE_LEN];    //编码部分
}CodeDictionary;

/*********函数声明*********/
LinkTree setWeight(char *);
LinkTree sortNode(LinkTree);
LinkTree createHTree(LinkTree);
void codeHTree(LinkTree,CodeDictionary *);
void decodeHTree(LinkTree,char *,char *);
void deleteNode(LinkTree);
void compressString(char *s,CodeDictionary *,char *);
void readFile(char *);
void writeFile(char *);
void readCode(LinkTree,char *);
void writeCode(LinkTree,char *);
void menu();

/**
*主函数
*输入:空
*返回:空
*/
void main(void)
{
    char choice;            //菜单选择变量
    char string[MAX_STRING_LEN];    //保存从文件中读取的内容
    LinkTree temp;            //保存赋了权值的表
    LinkTree ht;            //保存排序后的表
    LinkTree htcopy,tempcopy;        //表备份
    LinkTree htree;            //保存哈夫曼树
    LinkTree ptr=NULL;
    CodeDictionary codedictionary[MAX_WORDS];//编码字典
    char codestring[MAX_CODESTRING_LEN];    //保存0-1形的代码串     
    char codestring2[MAX_CODESTRING_LEN];//保存0-1形的代码串
    LinkTree ht2;            //保存读取的树
    LinkTree htree2;            //保存排序后的表
    char filestring[MAX_STRING_LEN];    //解码后要写入文件中的内容

    if((ht2=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建链表的头结点
    {
        printf("内存不足!");
        getch();
        exit(0);
    }
    ht2->next=NULL;

    while(1)
    {
        menu();            //调入主菜单
        choice=getch();        //读入用户选项
        switch(choice)        //判断用户选择
        {
            case 'c':
            case 'C':
                printf("\n您选择了压缩文件模式:\n\n");
                readFile(string);    //读取要编码的文件(字符串)
                temp=setWeight(string);            //得到有权值的表
                tempcopy=setWeight(string);
                ht=sortNode(temp);                //按权值排序后的表
                htcopy=sortNode(tempcopy);        //用于记录解码树
                htree=createHTree(ht);            //得到哈夫曼树
                codeHTree(htree,codedictionary);//哈夫曼编码
                compressString(string,codedictionary,codestring);//压缩为0-1码
                writeCode(htcopy,codestring);    //将解码树和0-1码保存                
                deleteNode(htree);                //释放空间*/
                break;
            case 'u':
            case 'U':
                printf("您选择了解压缩文件模式:\n\n");
                readCode(ht2,codestring2);        //读取要解码的0-1码
                htree2=createHTree(ht2);        //得到哈夫曼树
                codeHTree(htree2,codedictionary);//哈夫曼编码
                decodeHTree(htree2,codestring2,filestring); //解码
                writeFile(filestring);            //将解码文件保存
                deleteNode(htree2);                //释放空间
                break;
            case 'e':
            case 'E':
                exit(0);                        //退出程序
        }
    }
}


/**
*整理输入的字符串,求出每个字符在数组中出现的次数,作为权值
*输入:(字符型指针)字符串的地址
*返回:(哈夫曼结点指针)含权链表的首地址
*/
LinkTree setWeight(char *string)
{
    int i=0;                                    //文件字符串下标
    LinkTree tree;                                //头指针
    LinkTree ptr,beforeptr;                        //创建指针与其前驱
    HTNode *node;
    
    if((tree=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建链表的头结点
        return NULL;
    tree->next=NULL;
    
    for(i=0;string[i]!='\0';i++)
    {    
        ptr=tree;
        beforeptr=tree;
        
        if((node=(HTNode *)malloc(sizeof(HTNode)))==NULL)
            return NULL;
        node->next=NULL;
        node->parent=NULL;
        node->lchild=NULL;
        node->rchild=NULL;
        node->mark=0;
        node->ch=string[i];
        node->weight=1;
        
        if(tree->next==NULL)                    //如果是第一个非头结点
            tree->next=node;
        else
        {    
            ptr=tree->next;
            while(ptr&&ptr->ch!=node->ch)        //查找相同字符
            {    
                ptr=ptr->next;
                beforeptr=beforeptr->next;
            }
            if(ptr&&ptr->ch==node->ch)            //如果链表中某结点的字符与新结点的字符相同
            {    
                ptr->weight++;                    //将该结点的权加一  
                free(node);    
            }
            else                                //将新结点插入链表后
            {    
                node->next=beforeptr->next;
                beforeptr->next=node;
            }
        }
    }
    return tree;                                //返回头指针
}

/**
*将整理完的字符串(带权链表)按出现次数从小到大的顺序排列
*输入:(哈夫曼结点指针)要排序的表头地址
*返回:(哈夫曼结点指针)排序后的表头地址
*/
LinkTree sortNode(LinkTree tree)
{    
    LinkTree head;                                //头指针
    LinkTree ph,beforeph;                        //创建指针及其前驱
    LinkTree pt;
    
    if((head=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建新链表的头结点
        return NULL;
    head->next=NULL;
    
    ph=head;
    beforeph=head;
    
    while(tree->next)
    {    
        pt=tree->next;                            //取被操作链表的头结点
        tree->next=pt->next;
        pt->next=NULL;
        
        ph=head->next;
        beforeph=head;
        
        if(head->next==NULL)
            head->next=pt;                        //创建当前操作链表头结点
        else
        {
            while(ph&&ph->weight<pt->weight)    //将被操作结点插入相应位置
            {    
                ph=ph->next;
                beforeph=beforeph->next;
            }
            pt->next=beforeph->next;
            beforeph->next=pt;
        }
    }
    free(tree);
    return head;                                //返回排序后的头指针
}

/**
*用排完序的字符串建立哈夫曼树
*输入:(哈夫曼结点指针)要建立哈夫曼树的地址
*返回:(哈夫曼结点指针)建立后的哈夫曼树地址
*/
LinkTree createHTree(LinkTree tree)
{   
    LinkTree p,q,beforep;
    HTNode *newnode;
    
    for(p=tree->next,q=p->next;p!=NULL&&q!=NULL;p=tree->next,q=p->next)
                                                //p、q初值为头结点后的两个结点,即最小权结点
    {
        tree->next=q->next;
        q->next=NULL;
        p->next=NULL;
        
        if((newnode=(HTNode *)malloc(sizeof(HTNode)))==NULL)
                                                //申请新结点作为哈夫曼树的中间结点
            return NULL;
        newnode->next=NULL;
        newnode->mark=0;
        
        newnode->lchild=p;                        //取链表头结点后的两个结点作为新结点的左、右孩子
        newnode->rchild=q;
        p->parent=newnode;
        q->parent=newnode;
        newnode->weight=p->weight+q->weight;    //权值相加
        
        p=tree->next;
        beforep=tree;
        
        if(p!=NULL&&p->weight>=newnode->weight)
        {
            newnode->next=beforep->next;        //将新结点插入原链表的相应位置
            beforep->next=newnode;    
        }
        else
        {
            while(p!=NULL&&p->weight<newnode->weight)
            {    
                p=p->next;
                beforep=beforep->next;
            }
            newnode->next=beforep->next;
            beforep->next=newnode;
        }
    }
    return (tree->next);
}

/**
*对哈夫曼树进行编码
*输入:(哈夫曼结点指针)要编码的哈夫曼树地址
*      (编码字典类型指针)存放字典的首地址
*返回:空
*/
void codeHTree(LinkTree tree,CodeDictionary *codedictionary)
{   
    int index=0,k=0;
    char code[MAX_SINGLECODE_LEN];                //用于统计每个字符的哈夫曼编码
    LinkTree ptr=tree;                            //从树的根结点开始

    if(ptr==NULL)
    {
        printf("要压缩的文件是空的!\n");
        exit(0);
    }
    else
    {
        while(ptr->lchild&&ptr->rchild&&ptr->mark==0)
        {
            while(ptr->lchild&&ptr->lchild->mark==0)
            {
                code[index++]='0';                //左支路编码为0
                ptr=ptr->lchild;
                if(!ptr->lchild&&!ptr->rchild)    //如果没有左右孩子,即叶子结点
                {
                    ptr->mark=1;                //作标记,表明该字符已被编码
                    code[index]='\0';            //编码0-1字符串结束
                    codedictionary[k].ch=ptr->ch;//给字典赋字符值
                    for(index=0;code[index]!='\0';index++)
                        codedictionary[k].code[index]=code[index];//给字典赋码值
                    codedictionary[k].code[index]='\0';
                    k++;
                    ptr=tree;                    //指针复位
                    index=0;
                }
            }
            if(ptr->rchild&&ptr->rchild->mark==0)
            {
                ptr=ptr->rchild;
                code[index++]='1';                //右支路编码为1
            }
            if(!ptr->lchild&&!ptr->rchild)        //如果没有左右孩子,即叶子结点
            {
                ptr->mark=1;
                code[index++]='\0';
                codedictionary[k].ch=ptr->ch;    //给字典赋字符值
                for(index=0;code[index]!='\0';index++)
                    codedictionary[k].code[index]=code[index];//给字典赋码值
                codedictionary[k].code[index]='\0';
                k++;
                ptr=tree;
                index=0;
            }
            if(ptr->lchild->mark==1&&ptr->rchild->mark==1)//如果左右孩子都已标记
            {
                ptr->mark=1;
                ptr=tree;
                index=0;
            }
        }
    }
    printf("\n");
}

/**
*解码,即将0-1码转化为字符串
*输入:(哈夫曼结点指针)编码树的地址
*      (字符型指针)要解码的0-1字符串地址
*      (字符型指针)解码后的字符串地址
*返回:空
*/
void decodeHTree(LinkTree tree,char *code,char *filestring)
{    
    int i=0,j=0,k=0;
    char *char0_1;
    LinkTree ptr=tree;
    char0_1=(char *)malloc(MAX_SINGLECODE_LEN);    //此数组用于统计输入的0-1序列
    
    printf("预览解压后的字符:\n");
    for(j=0,ptr=tree;code[i]!='\0'&&ptr->lchild&&ptr->rchild;j=0,ptr=tree)
    {
        for(j=0;code[i]!='\0'&&ptr->lchild&&ptr->rchild;j++,i++)
        {
            if(code[i]=='0')
            {    
                ptr=ptr->lchild;
                char0_1[j]='0';  
            }
            if(code[i]=='1')
            {
                ptr=ptr->rchild;
                char0_1[j]='1';
            }
        }                        
        if(!ptr->lchild&&!ptr->rchild)
        {
            printf("%c",ptr->ch);                //显示解压后的字符
            filestring[k++]=ptr->ch;            //将字符逐一保存到字符串里
        }
        if(code[i]=='\0'&&ptr->lchild&&ptr->rchild)
        {    
            char0_1[j]='\0';
            printf("\n没有与最后的几个0-1序列:%s相匹配的字符!\n",char0_1);
            return;
        }
    }
    printf("\n\n");
    filestring[k]='\0';
    free(char0_1);
}

/**
*释放哈夫曼树所占用的空间
*输入:(哈夫曼结点指针)要释放的结点地址
*返回:空
*/
void deleteNode(LinkTree tree)
{    
    LinkTree ptr=tree;
    if(ptr)
    {    
        deleteNode(ptr->lchild);
        deleteNode(ptr->rchild);
        free(ptr);
    }
}

/**
*将整个字符串转化为0-1的字符串
*输入:(字符型指针)待转化的字符串首地址
*      (编码字典类型指针)字典首地址
*      (字符型指针)接收0-1码串的首地址
*返回:空
*/
void compressString(char *string,CodeDictionary *codedictionary,char *codestring)
{
    int i=0,j=0,k=0,m;

    while(string[i])                            //整个文件字符串没结束时
    {
        while(string[i]!=codedictionary[j].ch&&j<MAX_WORDS)
                                                //找与对应字符相同的字符
            j++;
        if(string[i]==codedictionary[j].ch)        //如果找到与对应字符相同的字符
            for(m=0;codedictionary[j].code[m];m++,k++)
                codestring[k]=codedictionary[j].code[m];
        j=0;                                    //字典复位
        i++;
    }
    codestring[k]='\0';
}

/**
*把指定文件读到字符串中
*输入:(字符型指针)待接收文件的字符串地址
*返回:空
*/
void readFile(char *string)
{
    FILE *fp;
    int i;
    char ch;                                    //记录读入的字符
    char path[PATH_LEN];                        //文本文件的读路径

    printf("请输入要压缩的文本文件地址:(无需扩展名)");
    gets(path);
    if((fp=fopen(strcat(path,".txt"),"r"))==NULL)
    {
        printf("\n路径不正确!\n");
        getch();
        return;
    }

    ch=fgetc(fp);
    for(i=0;ch!=EOF;i++)
    {
        string[i]=ch;
        ch=fgetc(fp);
    }
    string[i]='\0';
    fclose(fp);
}

/**
*保存编码后的解码树和字符串
*输入:(哈夫曼结点指针)解码树的地址
*      (字符型指针)要保存的0-1码串首地址
*返回:空
*/
void writeCode(LinkTree tree,char *string)
{
    FILE *fp;
    int i;
    int weight;                                    //记录写入的权值
    char ch;                                    //记录写入的字符
    LinkTree p;
    char path[PATH_LEN];                        //0-1码文件的写路径

    printf("请输入压缩后的保存路径及文件名:(无需扩展名)");
    gets(path);
    if((fp=fopen(strcat(path,".yxy"),"w+"))==NULL)
    {
        printf("\n文件路径出错!\n");
        getch();
        return;
    }
    p=tree->next;

    /*解码树部分写入文件前部分*/
    do
    {
        ch=p->ch;
        weight=p->weight;
        fprintf(fp,"%c%d",ch,weight);
        p=p->next;
    }while(p);
    fprintf(fp,"%c%d",'^',END_TREE);

    fseek(fp,sizeof(char),1);                    //空出区分位
    
    /*0-1码写入文件后部分*/
    for(i=0;string[i];i++)
    {
        ch=string[i];
        fputc(ch,fp);
    }
    printf("\n压缩成功!\n");
    getch();
    fclose(fp);
}

/**
*读取编码后的0-1字符串
*输入:(哈夫曼结点指针)解码树的地址
*      (字符型指针)要接收的0-1码串首地址
*返回:空
*/
void readCode(LinkTree tree,char *string)
{

    FILE *fp;
    int i;
    int weight;                                    //记录读入的权值
    char ch;                                    //记录读入的字符
    LinkTree ptr,beforeptr;
    char path[PATH_LEN];                        //0-1码文件的读路径

    printf("请输入要解压的文件路径及文件名:(无需扩展名)");
    gets(path);
    if((fp=fopen(strcat(path,".yxy"),"r"))==NULL)
    {
        printf("\n文件路径出错!\n");
        getch();
        return;
    }
    beforeptr=tree;

    /*从文件前部分读出解码树*/
    fscanf(fp,"%c%d",&ch,&weight);
    while(weight!=END_TREE)
    {
        if((ptr=(LinkTree)malloc(sizeof(HTNode)))==NULL)
        {
            printf("内存不足!");
            getch();
            exit(1);                            //错误出口
        }
        ptr->ch=ch;
        ptr->weight=weight;
        ptr->lchild=NULL;
        ptr->rchild=NULL;
        ptr->parent=NULL;
        ptr->mark=0;
        beforeptr->next=ptr;
        beforeptr=ptr;
        fscanf(fp,"%c%d",&ch,&weight);
    }
    beforeptr->next=NULL;

    fseek(fp,sizeof(char),1);                    //文件指针定位
    
    /*从文件后部分读出0-1码*/
    ch=fgetc(fp);
    for(i=0;ch!=EOF;i++)
    {
        string[i]=ch;
        ch=fgetc(fp);
    }
    string[i]='\0';
    fclose(fp);
}

/**
*保存解码后的文件
*输入:(字符型指针)解码后的字符串地址
*返回:空
*/
void writeFile(char *string)
{
    FILE *fp;
    char ch;                                    //记录写入的字符
    int i;
    char path[PATH_LEN];                        //文本文件的写路径

    printf("请输入解压后的保存路径及文件名:(无需扩展名)");
    gets(path);
    if((fp=fopen(strcat(path,".txt"),"w+"))==NULL)
    {
        printf("\n文件路径出错!\n");
        getch();
        return;
    }

    for(i=0;string[i];i++)
    {
        ch=string[i];
        fputc(ch,fp);
    }
    printf("\n解压成功!\n");
    getch();
    fclose(fp);
}

/**
*显示主菜单
*输入:空
*返回:空
*/
void menu()
{
    printf("\n\n\n\n\n\n");
    printf("\t\t -----**   欢迎使用WINYXY压缩工具   **-----");
    printf("\n\n\n");
    printf("\t\t\t\t<c> 压 缩\n\n");
    printf("\t\t\t\t<u> 解 压\n\n");
    printf("\t\t\t\t<e> 退 出\n\n");
    printf("\n\n请按键选择:\n");
    printf("\n\n\n\n\n\n");
}

回复列表 (共19个回复)

沙发

中文也可压缩。

板凳

请高手给点意见。

3 楼

能压缩二进制的吗?

4 楼

我默认的扩展名是.txt,只用来“模拟”压缩文本文件。

5 楼

能调试通过,可就是不懂

6 楼

谢谢这位大哥,,好经典哦!

7 楼

你的程序还不错,不过我还想向你讨教几个问题,就是我想把你这段程序中下面这几个有关的内容:
LinkTree temp;            //保存赋了权值的表
LinkTree ht;            //保存排序后的表
LinkTree htree;            //保存哈夫曼树
把这些子程序的数据输出来,要怎么加一点什么样的句子才可以把这几个子程序的数据输出来呢?想请教一下高手哦!

8 楼

前两个可以按照单链表的方法,直接遍历就可以输出来。
后一个已成树,要按树的方法。

现在没太多的时间,不能给出代码,你先自己想想。

9 楼

有时间的话可以给出一下程序吗?谢谢了!

10 楼

压缩比是多少呢?

我来回复

您尚未登录,请登录后再回复。点此登录或注册