回 帖 发 新 帖 刷新版面

主题:[讨论]数据结构臭作--huffman编码解码(2进制形式)

以为huffman压缩很好写,所以编了一个,发现有问题!

程序能够比较好的编码/解码,但是,由于我是用二进制读写的,而huffman 编码长度不能保证是8的整数倍,所以实际写入文件的有效的编码是n个byte外加0到7个bits;那么最后再解码的时候,最后一个字节的多余的若干bits的信息就有可能造成多余的字符被解出来...

这么说吧,比如空格比较多,其编码是'00',而我最后多余了7个bits的信息,且每位都是'0',那么在解码的时候,就会多解出3个空格出来!

望哪位高手可以帮我解决这个问题!

(程序的编排有些乱,大家就将就一下吧,抱歉了!)

回复列表 (共19个回复)

沙发

[color=006000]#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <conio.h>
#include <cctype>
[/color]
[color=0000FF]typedef[/color] [color=0000FF]unsigned[/color] [color=0000FF]long[/color] [color=0000FF]long[/color]  bit64[color=a46434];[/color]  [color=006080]//  VC6 can use "__int"--以下划线为前缀的
[/color]                                    [color=006080]//关键字表示它将在其后续的版本中被替换掉
[/color][color=0000FF]typedef[/color] [color=0000FF]unsigned[/color] [color=0000FF]int[/color]        bit32[color=a46434];[/color]
[color=0000FF]typedef[/color] [color=0000FF]unsigned[/color] [color=0000FF]short[/color]      bit16[color=a46434];[/color]
[color=0000FF]typedef[/color] [color=0000FF]unsigned[/color] [color=0000FF]char[/color]       bit8[color=a46434];[/color]

[color=0000FF]enum[/color][color=a46434]{[/color]OUTPUT[color=a46434]=[/color][color=800080]1[/color][color=a46434]};[/color]     [color=006080]//  输出控制0: 不输出;1: 输出
[/color]
[color=0000FF]struct[/color] hufNode[color=a46434]{[/color]
    bit32   ch[color=a46434];[/color]     [color=006080]//  为了保证每个域的内存大小一样,特指定每个域类型都为一样!
[/color]    bit32   weight[color=a46434];[/color]
    bit32   left[color=a46434];[/color]
    bit32   right[color=a46434];[/color]
[color=a46434]};[/color]

[color=006080]//  打印树并返回树的深度
[/color]bit32 BinTreePrint[color=a46434]([/color]hufNode treeTab[color=a46434][],[/color] bit32 n[color=a46434]){[/color]
    [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434]){[/color]
        printf[color=a46434]([/color] isprint[color=a46434]([/color]treeTab[color=a46434][[/color]n[color=a46434]].[/color]ch[color=a46434])?[/color][color=cc0000]"<%c>"[/color][color=a46434]:[/color][color=cc0000]"<%02x>"[/color][color=a46434],[/color] treeTab[color=a46434][[/color]n[color=a46434]].[/color]ch [color=a46434]);[/color]
        printf[color=a46434]([/color][color=cc0000]"<w : %d>"[/color][color=a46434],[/color] treeTab[color=a46434][[/color]n[color=a46434]].[/color]weight[color=a46434]);[/color]
    [color=a46434]}[/color]
    [color=0000FF]if[/color][color=a46434]([/color] treeTab[color=a46434][[/color]n[color=a46434]].[/color]left [color=a46434]){[/color]
        bit32 d1[color=a46434],[/color] d2[color=a46434];[/color]
        [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] putchar[color=a46434]([/color][color=FF00FF]'{'[/color][color=a46434]);[/color]
        d1 [color=a46434]=[/color] BinTreePrint[color=a46434]([/color] treeTab[color=a46434],[/color] treeTab[color=a46434][[/color]n[color=a46434]].[/color]left [color=a46434]);[/color]
        d2 [color=a46434]=[/color] BinTreePrint[color=a46434]([/color] treeTab[color=a46434],[/color] treeTab[color=a46434][[/color]n[color=a46434]].[/color]right [color=a46434]);[/color]
        [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] putchar[color=a46434]([/color][color=FF00FF]'}'[/color][color=a46434]);[/color]
        [color=0000FF]return[/color] [color=a46434]([/color]d2[color=a46434]>[/color]d1[color=a46434]?[/color]d2[color=a46434]:[/color]d1[color=a46434])[/color] [color=a46434]+[/color] [color=800080]1[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    [color=0000FF]else[/color] [color=0000FF]return[/color] [color=800080]0[/color][color=a46434];[/color]
[color=a46434]}[/color]

[color=006080]//  原始压缩算法:
//  把一个静态二叉树原样写入输出文件头
[/color][color=0000FF]bool[/color] compress[color=a46434]([/color] [color=0000FF]const[/color] [color=0000FF]char[/color][color=a46434]*[/color] inName[color=a46434],[/color] [color=0000FF]const[/color] [color=0000FF]char[/color][color=a46434]*[/color] outName [color=a46434]){[/color]
    [color=006080]//  只压缩txt文档中的ASC字符,忽略其间的中文
[/color]    [color=0000FF]static[/color] [color=0000FF]const[/color] [color=0000FF]int[/color] charTabLen [color=a46434]=[/color] [color=800080]128[/color][color=a46434];[/color]
    bit32 charTab[color=a46434][[/color]charTabLen[color=a46434]]={[/color][color=800080]0[/color][color=a46434]};[/color]
    FILE [color=a46434]*[/color]in [color=a46434]=[/color] fopen[color=a46434]([/color]inName[color=a46434],[/color] [color=cc0000]"rb"[/color][color=a46434]),[/color] [color=a46434]*[/color]out [color=a46434]=[/color] fopen[color=a46434]([/color] outName[color=a46434],[/color] [color=cc0000]"wb"[/color] [color=a46434]);[/color]
    [color=0000FF]if[/color][color=a46434]([/color] [color=800080]0[/color] [color=a46434]==[/color] in [color=a46434]){[/color]
        printf[color=a46434]([/color] [color=cc0000]"Can not open file <%s> to read!\n"[/color][color=a46434],[/color] inName [color=a46434]);[/color]
        [color=0000FF]return[/color] [color=0000FF]false[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    [color=0000FF]if[/color][color=a46434]([/color] [color=800080]0[/color] [color=a46434]==[/color] out [color=a46434]){[/color]
        printf[color=a46434]([/color] [color=cc0000]"Can not open file <%s> to write!\n"[/color][color=a46434],[/color] outName [color=a46434]);[/color]
        [color=0000FF]return[/color] [color=0000FF]false[/color][color=a46434];[/color]
    [color=a46434]}[/color]

    bit32 tabSize[color=a46434]=[/color][color=800080]0[/color][color=a46434];[/color]    [color=006080]//  记录文章中实际使用的字符种数
[/color]    bit8 ch[color=a46434];[/color]
    [color=006080]//  size_t fread(void *ptr, size_t size, size_t n, FILE *stream);
[/color]    [color=0000FF]while[/color][color=a46434]([/color] [color=800080]1[/color] [color=a46434]==[/color] fread[color=a46434]([/color] [color=a46434]&[/color]ch[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]bit8[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color] in [color=a46434])[/color] [color=a46434]){[/color]
        [color=0000FF]if[/color][color=a46434]([/color] ch [color=a46434]&[/color] [color=800080]0x80[/color] [color=a46434]){[/color][color=006080]//  回避中文字符
[/color]            fread[color=a46434]([/color] [color=a46434]&[/color]ch[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]bit8[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color] in [color=a46434]);[/color]
            [color=0000FF]continue[/color][color=a46434];[/color]
        [color=a46434]}[/color]
        [color=0000FF]if[/color][color=a46434]([/color] charTab[color=a46434][[/color]ch[color=a46434]]==[/color][color=800080]0[/color] [color=a46434])[/color] tabSize[color=a46434]++;[/color]
        charTab[color=a46434][[/color]ch[color=a46434]]++;[/color]
    [color=a46434]}[/color]

    [color=006080]//  如何建立这棵二叉树?需结点空间数目是2*tabSize;叶子结点都是值;而...
[/color]    [color=006080]//  []
[/color]    [color=006080]// /  \
[/color]    [color=006080]//[]  []
[/color]    [color=006080]//该静态链表的第一个结点表示树根的位置
[/color]
    [color=006080]//  创建整个链表
[/color]    [color=006080]//  包含"头结点"1个,"叶子结点"tabSize个以及"辅助结点"tabSize-1个。
[/color]    [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] printf[color=a46434]([/color][color=cc0000]"table size = %d\n"[/color][color=a46434],[/color] tabSize [color=a46434]);[/color]
    hufNode[color=a46434]*[/color] treeTab [color=a46434]=[/color] [color=a46434]([/color]hufNode[color=a46434]*)[/color]malloc[color=a46434]([/color][color=0000FF]sizeof[/color][color=a46434]([/color]hufNode[color=a46434])*[/color][color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434]);[/color]
    memset[color=a46434]([/color] treeTab[color=a46434],[/color] [color=800080]0[/color][color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]hufNode[color=a46434])*[/color][color=800080]2[/color][color=a46434]*[/color]tabSize [color=a46434]);[/color]
    [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] i[color=a46434]=[/color][color=800080]0[/color][color=a46434],[/color] top[color=a46434]=[/color][color=800080]1[/color][color=a46434];[/color] i[color=a46434]<[/color]charTabLen[color=a46434];[/color] i[color=a46434]++[/color] [color=a46434]){[/color]   [color=006080]//  tabSize个"叶子"结点
[/color]        [color=0000FF]if[/color][color=a46434]([/color] charTab[color=a46434][[/color]i[color=a46434]][/color] [color=a46434]){[/color]
            treeTab[color=a46434][[/color]top[color=a46434]].[/color]ch [color=a46434]=[/color] i[color=a46434];[/color]
            treeTab[color=a46434][[/color]top[color=a46434]].[/color]weight [color=a46434]=[/color] charTab[color=a46434][[/color]i[color=a46434]];[/color]
            [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] printf[color=a46434]([/color][color=cc0000]"%3d"[/color][color=a46434],[/color] i [color=a46434]);[/color]
            [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color]
                printf[color=a46434]([/color] isprint[color=a46434]([/color] i [color=a46434])[/color] [color=a46434]?[/color] [color=cc0000]" <%c> %d\n"[/color] [color=a46434]:[/color] [color=cc0000]" <%02x> %d\n"[/color][color=a46434],[/color]
                    i[color=a46434],[/color] charTab[color=a46434][[/color]i[color=a46434]][/color] [color=a46434]);[/color]
            top[color=a46434]++;[/color]
        [color=a46434]}[/color]
    [color=a46434]}[/color]
    [color=0000FF]int[/color][color=a46434]*[/color] volun [color=a46434]=[/color] [color=a46434]([/color][color=0000FF]int[/color][color=a46434]*)[/color] malloc[color=a46434]([/color][color=0000FF]sizeof[/color][color=a46434]([/color][color=0000FF]int[/color][color=a46434])*[/color][color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434]);[/color]  [color=006080]//  确定结点的使用情况
[/color]    memset[color=a46434]([/color] volun[color=a46434],[/color] [color=800080]0[/color][color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color] [color=0000FF]int[/color] [color=a46434])*[/color][color=800080]2[/color][color=a46434]*[/color]tabSize [color=a46434]);[/color]
    [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] i[color=a46434]=[/color][color=800080]1[/color][color=a46434];[/color] i[color=a46434]<=[/color]tabSize[color=a46434];[/color] i[color=a46434]++[/color] [color=a46434]){[/color]
        volun[color=a46434][[/color]i[color=a46434]]=[/color][color=800080]1[/color][color=a46434];[/color]
    [color=a46434]}[/color]

    [color=006080]//  m1,m2最小和第二小结点----两个游标
[/color]    [color=006080]//  需要不断从上面链表中选出权值最小的两个结点,并从剩余的辅助结点中选出一个
[/color]    [color=006080]//结点[以rear为下标]作为他们的父结点,重新纳入备选链表!
[/color]    [color=006080]//  base 表中首个非0的结点序号
[/color]    [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] rear[color=a46434]=[/color]tabSize[color=a46434]+[/color][color=800080]1[/color][color=a46434],[/color] m1[color=a46434],[/color] m2[color=a46434],[/color] base[color=a46434]=[/color][color=800080]1[/color][color=a46434];[/color] rear[color=a46434]<[/color][color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434];[/color] rear[color=a46434]++[/color] [color=a46434]){[/color]
        [color=006080]//  确定两个游标的初始值
[/color]        [color=0000FF]while[/color][color=a46434]([/color] [color=800080]0[/color] [color=a46434]==[/color] volun[color=a46434][[/color]base[color=a46434]][/color] [color=a46434])[/color] base[color=a46434]++;[/color]
        m1 [color=a46434]=[/color] base[color=a46434];[/color]
        [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] i[color=a46434]=[/color]base[color=a46434]+[/color][color=800080]1[/color][color=a46434];[/color] i[color=a46434]<[/color][color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434];[/color] i[color=a46434]++[/color] [color=a46434]){[/color]
            [color=0000FF]if[/color][color=a46434]([/color] volun[color=a46434][[/color]i[color=a46434]][/color] [color=a46434]){[/color]
                [color=0000FF]if[/color][color=a46434]([/color] treeTab[color=a46434][[/color]i[color=a46434]].[/color]weight [color=a46434]<[/color] treeTab[color=a46434][[/color]m1[color=a46434]].[/color]weight [color=a46434]){[/color]
                    m2 [color=a46434]=[/color] m1[color=a46434];[/color]
                    m1 [color=a46434]=[/color] i[color=a46434];[/color]
                [color=a46434]}[/color]
                [color=0000FF]else[/color][color=a46434]{[/color]
                    m2 [color=a46434]=[/color] i[color=a46434];[/color]
                [color=a46434]}[/color]
                [color=0000FF]break[/color][color=a46434];[/color]
            [color=a46434]}[/color]
        [color=a46434]}[/color]
        [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] i [color=a46434]=[/color] m2[color=a46434]>[/color]m1[color=a46434]?[/color]m2[color=a46434]:[/color]m1 [color=a46434]+[/color] [color=800080]1[/color][color=a46434];[/color] i [color=a46434]<[/color] [color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434];[/color] i[color=a46434]++[/color] [color=a46434]){[/color]
            [color=0000FF]if[/color][color=a46434]([/color] [color=a46434]![/color]volun[color=a46434][[/color]i[color=a46434]][/color] [color=a46434])[/color] [color=0000FF]continue[/color][color=a46434];[/color]
            [color=0000FF]if[/color][color=a46434]([/color] treeTab[color=a46434][[/color]i[color=a46434]].[/color]weight [color=a46434]<[/color] treeTab[color=a46434][[/color]m1[color=a46434]].[/color]weight [color=a46434]){[/color]
                m2 [color=a46434]=[/color] m1[color=a46434];[/color]
                m1 [color=a46434]=[/color] i[color=a46434];[/color]
            [color=a46434]}[/color]
            [color=0000FF]else[/color] [color=0000FF]if[/color][color=a46434]([/color] treeTab[color=a46434][[/color]i[color=a46434]].[/color]weight [color=a46434]<[/color] treeTab[color=a46434][[/color]m2[color=a46434]].[/color]weight [color=a46434]){[/color]
                m2 [color=a46434]=[/color] i[color=a46434];[/color]
            [color=a46434]}[/color]
        [color=a46434]}[/color]
        treeTab[color=a46434][[/color]rear[color=a46434]].[/color]weight [color=a46434]=[/color] treeTab[color=a46434][[/color]m1[color=a46434]].[/color]weight [color=a46434]+[/color] treeTab[color=a46434][[/color]m2[color=a46434]].[/color]weight[color=a46434];[/color]
        treeTab[color=a46434][[/color]rear[color=a46434]].[/color]left [color=a46434]=[/color] m1[color=a46434];[/color]
        treeTab[color=a46434][[/color]rear[color=a46434]].[/color]right[color=a46434]=[/color] m2[color=a46434];[/color]
        volun[color=a46434][[/color]rear[color=a46434]]=[/color][color=800080]1[/color][color=a46434];[/color]
        volun[color=a46434][[/color]m1[color=a46434]]=[/color]volun[color=a46434][[/color]m2[color=a46434]]=[/color][color=800080]0[/color][color=a46434];[/color]
        [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] printf[color=a46434]([/color][color=cc0000]"m1 %3d w%5d\n"[/color][color=a46434],[/color] m1[color=a46434],[/color] treeTab[color=a46434][[/color]m1[color=a46434]].[/color]weight[color=a46434]);[/color]
        [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] printf[color=a46434]([/color][color=cc0000]"m2 %3d w%5d\n"[/color][color=a46434],[/color] m2[color=a46434],[/color] treeTab[color=a46434][[/color]m2[color=a46434]].[/color]weight[color=a46434]);[/color]
        [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] printf[color=a46434]([/color][color=cc0000]"root %3d\n"[/color][color=a46434],[/color] rear [color=a46434]);[/color]
    [color=a46434]}[/color]
    treeTab[color=a46434][[/color][color=800080]0[/color][color=a46434]].[/color]weight [color=a46434]=[/color] [color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434]-[/color][color=800080]1[/color][color=a46434];[/color]    [color=006080]//  树根结点,记录总的结点数
[/color]    [color=0000FF]int[/color] dep [color=a46434]=[/color] BinTreePrint[color=a46434]([/color] treeTab[color=a46434],[/color] [color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434]-[/color][color=800080]1[/color] [color=a46434]);[/color]
    [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] printf[color=a46434]([/color][color=cc0000]"\nthe dep is %d\n"[/color][color=a46434],[/color] dep [color=a46434]);[/color]

    [color=006080]//  二叉树构成了以后,该如何生成huffman编码表,使得转换速度比较快?
[/color]    [color=006080]//首选当然是类似hash表的模式!
[/color]    [color=006080]//  用上面的二叉树生成的编码有些是以0开头的,并以0结尾...如何界定每个编码的
[/color]    [color=006080]//长度就有了麻烦!
[/color]    [color=006080]//  需要一定的空间来储存这个编码长度的信息!
[/color]    [color=006080]//  这样处理,把编码补长一个bit,令该bit为1,其后的作为有效编码部分!
[/color]    [color=006080]//  比如对编码'00',则变为'100'!
[/color]
    [color=006080]//  注意,解码的时候,实际是不断的从文件中一个一个byte的读出内容,并根据该字
[/color]    [color=006080]//节二进制信息,从二叉树顶,移动到某叶子上,并输出该叶子的ch值,再回到树根。
[/color]    [color=006080]//  那么,二进制信息的写入就变得非常重要了!需要选择合适的方式!
[/color]    [color=0000FF]void[/color] createHufCode[color=a46434]([/color] hufNode[color=a46434]*[/color] treeTab[color=a46434],[/color] [color=0000FF]int[/color] i[color=a46434],[/color] bit32 charTab[color=a46434][],[/color] bit32 code[color=a46434]);[/color]
    [color=0000FF]int[/color] codeLen[color=a46434]([/color] bit32 code [color=a46434]);[/color]
    fwrite[color=a46434]([/color] treeTab[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]hufNode[color=a46434]),[/color] [color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434],[/color] out [color=a46434]);[/color]
    memset[color=a46434]([/color] charTab[color=a46434],[/color] [color=800080]0[/color][color=a46434],[/color] charTabLen[color=a46434]*[/color][color=0000FF]sizeof[/color][color=a46434]([/color]bit32[color=a46434])[/color] [color=a46434]);[/color]
    createHufCode[color=a46434]([/color] treeTab[color=a46434],[/color] [color=800080]2[/color][color=a46434]*[/color]tabSize[color=a46434]-[/color][color=800080]1[/color][color=a46434],[/color] charTab[color=a46434],[/color] [color=800080]1[/color] [color=a46434]);[/color]

    [color=0000FF]void[/color] hufCodePrint[color=a46434]([/color] bit32 code [color=a46434]);[/color]
    [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434]){[/color]
        [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] i[color=a46434]=[/color][color=800080]0[/color][color=a46434];[/color] i[color=a46434]<[/color]charTabLen[color=a46434];[/color] i[color=a46434]++[/color] [color=a46434]){[/color]
            [color=0000FF]if[/color][color=a46434]([/color] charTab[color=a46434][[/color]i[color=a46434]][/color] [color=a46434]){[/color]
                printf[color=a46434]([/color]isprint[color=a46434]([/color]i[color=a46434])?[/color][color=cc0000]"<%c> "[/color][color=a46434]:[/color][color=cc0000]"<%02x> "[/color][color=a46434],[/color] i[color=a46434]);[/color]
                hufCodePrint[color=a46434]([/color] charTab[color=a46434][[/color]i[color=a46434]][/color] [color=a46434]);[/color]
            [color=a46434]}[/color]
        [color=a46434]}[/color]
    [color=a46434]}[/color]

    fseek[color=a46434]([/color]in[color=a46434],[/color] [color=800080]0[/color][color=a46434],[/color] SEEK_SET[color=a46434]);[/color]
    bit32 code[color=a46434];[/color]
    bit32 buff[color=a46434]=[/color][color=800080]0[/color][color=a46434];[/color]
    bit32 top[color=a46434]=[/color][color=800080]0[/color][color=a46434];[/color]
    bit32 len[color=a46434];[/color]
    [color=0000FF]while[/color][color=a46434]([/color] [color=800080]1[/color] [color=a46434]==[/color] fread[color=a46434]([/color] [color=a46434]&[/color]ch[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]bit8[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color] in [color=a46434])[/color] [color=a46434]){[/color]
        [color=0000FF]if[/color][color=a46434]([/color] ch [color=a46434]&[/color] [color=800080]0x80[/color] [color=a46434]){[/color]    [color=006080]//  回避中文字符
[/color]            fseek[color=a46434]([/color] in[color=a46434],[/color] [color=800080]1[/color][color=a46434],[/color] SEEK_CUR [color=a46434]);[/color]   [color=006080]//  seek one byte ahead
[/color]            [color=0000FF]continue[/color][color=a46434];[/color]
        [color=a46434]}[/color]
        code [color=a46434]=[/color] charTab[color=a46434][[/color]ch[color=a46434]];[/color]
        [color=0000FF]if[/color][color=a46434]([/color] [color=a46434]![/color]code [color=a46434]){[/color]
            puts[color=a46434]([/color][color=cc0000]"somthing wrong with code creating!"[/color][color=a46434]);[/color]
            [color=0000FF]return[/color] [color=0000FF]false[/color][color=a46434];[/color]
        [color=a46434]}[/color]
        len [color=a46434]=[/color] codeLen[color=a46434]([/color]code[color=a46434]);[/color]        [color=006080]//  以排头的1作为长度标识。
[/color]        top [color=a46434]+=[/color] [color=a46434]([/color]len[color=a46434]-[/color][color=800080]1[/color][color=a46434]);[/color]
        code [color=a46434]^=[/color] [color=800080]1[/color][color=a46434]<<([/color]len[color=a46434]-[/color][color=800080]1[/color][color=a46434]);[/color]         [color=006080]//  把该位修正为0;
[/color]        buff [color=a46434]|=[/color] code [color=a46434]<<[/color] [color=a46434]([/color][color=800080]32[/color][color=a46434]-[/color]top[color=a46434]);[/color]   [color=006080]//  压入输出"队列"
[/color]        [color=0000FF]while[/color][color=a46434]([/color] top[color=a46434]>=[/color][color=800080]8[/color] [color=a46434]){[/color]
            fwrite[color=a46434]([/color] [color=a46434]([/color][color=0000FF]char[/color][color=a46434]*)&[/color]buff[color=a46434]+[/color][color=800080]3[/color][color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]bit8[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color] out [color=a46434]);[/color]
            top[color=a46434]-=[/color][color=800080]8[/color][color=a46434];[/color]
            buff[color=a46434]<<=[/color][color=800080]8[/color][color=a46434];[/color]
        [color=a46434]}[/color]
    [color=a46434]}[/color]
    [color=0000FF]if[/color][color=a46434]([/color] top [color=a46434]){[/color]                      [color=006080]//  把队列清空
[/color]        fwrite[color=a46434]([/color] [color=a46434]([/color][color=0000FF]char[/color][color=a46434]*)&[/color]buff[color=a46434]+[/color][color=800080]3[/color][color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]bit8[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color]  out [color=a46434]);[/color]
    [color=a46434]}[/color]

    fclose[color=a46434]([/color]in[color=a46434]);[/color]
    fclose[color=a46434]([/color]out[color=a46434]);[/color]
    free[color=a46434]([/color]treeTab[color=a46434]);[/color]
    free[color=a46434]([/color]volun[color=a46434]);[/color]
    [color=0000FF]return[/color] [color=0000FF]true[/color][color=a46434];[/color]
[color=a46434]}[/color]

[color=0000FF]int[/color] codeLen[color=a46434]([/color] bit32 code [color=a46434]){[/color]
    [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] i[color=a46434]=[/color][color=800080]31[/color][color=a46434];[/color] i[color=a46434]>=[/color][color=800080]0[/color][color=a46434];[/color] i[color=a46434]--[/color] [color=a46434]){[/color]
        [color=0000FF]if[/color][color=a46434]([/color] code [color=a46434]&[/color] [color=a46434]([/color][color=800080]1[/color][color=a46434]<<[/color]i[color=a46434])[/color] [color=a46434])[/color] [color=0000FF]return[/color] i[color=a46434]+[/color][color=800080]1[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    [color=0000FF]return[/color] [color=800080]0[/color][color=a46434];[/color]
[color=a46434]}[/color]

[color=0000FF]void[/color] hufCodePrint[color=a46434]([/color] bit32 code [color=a46434]){[/color]
    [color=0000FF]if[/color][color=a46434]([/color] code [color=a46434]<=[/color] [color=800080]0x2[/color] [color=a46434]){[/color]
        puts[color=a46434]([/color][color=cc0000]"wrong code"[/color][color=a46434]);[/color]
        [color=0000FF]return[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] i[color=a46434]=[/color]codeLen[color=a46434]([/color]code[color=a46434])-[/color][color=800080]2[/color][color=a46434];[/color] i[color=a46434]>=[/color][color=800080]0[/color][color=a46434];[/color] i[color=a46434]--[/color] [color=a46434]){[/color]
        printf[color=a46434]([/color][color=cc0000]"%d"[/color][color=a46434],([/color]code[color=a46434]>>[/color]i[color=a46434])&[/color][color=800080]1[/color][color=a46434]);[/color]
    [color=a46434]}[/color]
    puts[color=a46434]([/color][color=cc0000]""[/color][color=a46434]);[/color]
[color=a46434]}[/color]

[color=0000FF]void[/color] createHufCode[color=a46434]([/color] hufNode[color=a46434]*[/color] treeTab[color=a46434],[/color] [color=0000FF]int[/color] i[color=a46434],[/color] bit32 charTab[color=a46434][],[/color] bit32 code[color=a46434]){[/color]
    [color=0000FF]if[/color][color=a46434]([/color] [color=a46434]![/color]treeTab[color=a46434][[/color]i[color=a46434]].[/color]left [color=a46434]){[/color]
        charTab[color=a46434][[/color]treeTab[color=a46434][[/color]i[color=a46434]].[/color]ch[color=a46434]][/color] [color=a46434]=[/color] code[color=a46434];[/color]
        [color=0000FF]return[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    createHufCode[color=a46434]([/color] treeTab[color=a46434],[/color] treeTab[color=a46434][[/color]i[color=a46434]].[/color]left[color=a46434],[/color]  charTab[color=a46434],[/color] [color=a46434]([/color]code[color=a46434]<<[/color][color=800080]1[/color][color=a46434])+[/color][color=800080]0[/color] [color=a46434]);[/color]
    createHufCode[color=a46434]([/color] treeTab[color=a46434],[/color] treeTab[color=a46434][[/color]i[color=a46434]].[/color]right[color=a46434],[/color] charTab[color=a46434],[/color] [color=a46434]([/color]code[color=a46434]<<[/color][color=800080]1[/color][color=a46434])+[/color][color=800080]1[/color] [color=a46434]);[/color]
[color=a46434]}[/color]

[color=006080]//  从被解压文件头处读出二叉树
[/color][color=0000FF]bool[/color] abstract[color=a46434]([/color] [color=0000FF]const[/color] [color=0000FF]char[/color][color=a46434]*[/color] inName[color=a46434],[/color] [color=0000FF]const[/color] [color=0000FF]char[/color][color=a46434]*[/color] outName [color=a46434]){[/color]
    FILE [color=a46434]*[/color]in [color=a46434]=[/color] fopen[color=a46434]([/color]inName[color=a46434],[/color] [color=cc0000]"rb"[/color][color=a46434]),[/color] [color=a46434]*[/color]out [color=a46434]=[/color] fopen[color=a46434]([/color]outName[color=a46434],[/color] [color=cc0000]"wb"[/color][color=a46434]);[/color]
    [color=0000FF]if[/color][color=a46434]([/color] [color=800080]0[/color] [color=a46434]==[/color] in [color=a46434]){[/color]
        printf[color=a46434]([/color] [color=cc0000]"Can not open file <%s> to read!\n"[/color][color=a46434],[/color] inName [color=a46434]);[/color]
        [color=0000FF]return[/color] [color=0000FF]false[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    [color=0000FF]if[/color][color=a46434]([/color] [color=800080]0[/color] [color=a46434]==[/color] out [color=a46434]){[/color]
        printf[color=a46434]([/color] [color=cc0000]"Can not open file <%s> to write!\n"[/color][color=a46434],[/color] outName [color=a46434]);[/color]
        [color=0000FF]return[/color] [color=0000FF]false[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    [color=006080]//  重建huffman树
[/color]    hufNode [color=a46434]*[/color]treeTab [color=a46434]=[/color] [color=a46434]([/color]hufNode[color=a46434]*)[/color]malloc[color=a46434]([/color][color=0000FF]sizeof[/color][color=a46434]([/color]hufNode[color=a46434]));[/color]
    fread[color=a46434]([/color] treeTab[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]hufNode[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color] in [color=a46434]);[/color]
    bit32 tabLen [color=a46434]=[/color] treeTab[color=a46434]->[/color]weight[color=a46434]+[/color][color=800080]1[/color][color=a46434];[/color]
    treeTab [color=a46434]=[/color] [color=a46434]([/color]hufNode[color=a46434]*)[/color]realloc[color=a46434]([/color]treeTab[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]hufNode[color=a46434])*[/color]tabLen[color=a46434]);[/color]
    fread[color=a46434]([/color] treeTab[color=a46434]+[/color][color=800080]1[/color][color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]hufNode[color=a46434]),[/color] tabLen[color=a46434]-[/color][color=800080]1[/color][color=a46434],[/color] in [color=a46434]);[/color]
    [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] BinTreePrint[color=a46434]([/color] treeTab[color=a46434],[/color] tabLen[color=a46434]-[/color][color=800080]1[/color] [color=a46434]);[/color]
    [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] puts[color=a46434]([/color][color=cc0000]"\nrebuilt succeed!"[/color] [color=a46434]);[/color]

    bit8 ch[color=a46434];[/color]
    [color=0000FF]int[/color] i[color=a46434]=[/color]tabLen[color=a46434]-[/color][color=800080]1[/color][color=a46434];[/color]
    [color=0000FF]while[/color][color=a46434]([/color] [color=800080]1[/color] [color=a46434]==[/color] fread[color=a46434]([/color] [color=a46434]&[/color]ch[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]bit8[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color] in [color=a46434])[/color] [color=a46434]){[/color]
        [color=0000FF]for[/color][color=a46434]([/color] [color=0000FF]int[/color] dep[color=a46434]=[/color][color=800080]0[/color][color=a46434];[/color] dep[color=a46434]<[/color][color=800080]8[/color][color=a46434];[/color] ch[color=a46434]<<=[/color][color=800080]1[/color][color=a46434],[/color] dep[color=a46434]++[/color] [color=a46434]){[/color]
            [color=0000FF]if[/color][color=a46434]([/color] [color=a46434]![/color]treeTab[color=a46434][[/color]i[color=a46434]].[/color]left [color=a46434]!=[/color] [color=a46434]![/color]treeTab[color=a46434][[/color]i[color=a46434]].[/color]right [color=a46434]){[/color]
                puts[color=a46434]([/color][color=cc0000]"wrong in code tree!"[/color][color=a46434]);[/color]
                [color=0000FF]return[/color] [color=0000FF]false[/color][color=a46434];[/color]
            [color=a46434]}[/color]
            [color=0000FF]if[/color][color=a46434]([/color] [color=a46434]![/color]treeTab[color=a46434][[/color]i[color=a46434]].[/color]left [color=a46434]&&[/color] [color=a46434]![/color]treeTab[color=a46434][[/color]i[color=a46434]].[/color]right [color=a46434]){[/color]
                fwrite[color=a46434]([/color] [color=a46434]&[/color]treeTab[color=a46434][[/color]i[color=a46434]].[/color]ch[color=a46434],[/color] [color=0000FF]sizeof[/color][color=a46434]([/color]bit8[color=a46434]),[/color] [color=800080]1[/color][color=a46434],[/color] out [color=a46434]);[/color]
                i [color=a46434]=[/color] tabLen[color=a46434]-[/color][color=800080]1[/color][color=a46434];[/color]
            [color=a46434]}[/color]
            [color=0000FF]if[/color][color=a46434]([/color] ch[color=a46434]&[/color][color=800080]0x80[/color] [color=a46434]){[/color]
                i [color=a46434]=[/color] treeTab[color=a46434][[/color]i[color=a46434]].[/color]right[color=a46434];[/color]
            [color=a46434]}[/color]
            [color=0000FF]else[/color] [color=a46434]{[/color]
                i [color=a46434]=[/color] treeTab[color=a46434][[/color]i[color=a46434]].[/color]left[color=a46434];[/color]
            [color=a46434]}[/color]
        [color=a46434]}[/color]
    [color=a46434]}[/color]
    free[color=a46434]([/color] treeTab [color=a46434]);[/color]
    fclose[color=a46434]([/color] in [color=a46434]);[/color]
    fclose[color=a46434]([/color] out [color=a46434]);[/color]
    [color=0000FF]return[/color] [color=0000FF]true[/color][color=a46434];[/color]
[color=a46434]}[/color]

[color=0000FF]int[/color] [color=0000FF]main[/color][color=a46434]([/color] [color=0000FF]int[/color] arc[color=a46434],[/color] [color=0000FF]char[/color][color=a46434]**[/color] agv [color=a46434]){[/color]
    [color=0000FF]char[/color] outName[color=a46434][[/color][color=800080]256[/color][color=a46434]];[/color]
    [color=0000FF]switch[/color][color=a46434]([/color] arc [color=a46434]){[/color]
    [color=0000FF]case[/color] [color=800080]2[/color][color=a46434]:[/color]
        sprintf[color=a46434]([/color] outName[color=a46434],[/color] [color=cc0000]"%s.huf"[/color][color=a46434],[/color] agv[color=a46434][[/color][color=800080]1[/color][color=a46434]][/color] [color=a46434]);[/color]
        [color=0000FF]if[/color][color=a46434]([/color] [color=800080]0[/color] [color=a46434]==[/color] compress[color=a46434]([/color] agv[color=a46434][[/color][color=800080]1[/color][color=a46434]],[/color] outName [color=a46434])[/color] [color=a46434]){[/color]
            printf[color=a46434]([/color] [color=cc0000]"Error Encounters in compressing file <%s>\n"[/color][color=a46434],[/color] agv[color=a46434][[/color][color=800080]1[/color][color=a46434]][/color] [color=a46434]);[/color]
        [color=a46434]}[/color]
        [color=0000FF]else[/color][color=a46434]{[/color]
            printf[color=a46434]([/color] [color=cc0000]"Compress to <%s> succeed!\n"[/color][color=a46434],[/color] outName [color=a46434]);[/color]
        [color=a46434]}[/color]
        [color=0000FF]break[/color][color=a46434];[/color]
    [color=0000FF]case[/color] [color=800080]3[/color][color=a46434]:[/color]
        [color=0000FF]if[/color][color=a46434]([/color] tolower[color=a46434]([/color]agv[color=a46434][[/color][color=800080]2[/color][color=a46434]][[/color][color=800080]0[/color][color=a46434]])[/color] [color=a46434]!=[/color] [color=FF00FF]'u'[/color] [color=a46434]){[/color]
            puts[color=a46434]([/color] [color=cc0000]"Error using Abstract file!"[/color] [color=a46434]);[/color]
            [color=0000FF]break[/color][color=a46434];[/color]
        [color=a46434]}[/color]
        sprintf[color=a46434]([/color] outName[color=a46434],[/color] [color=cc0000]"%s.txt"[/color][color=a46434],[/color] agv[color=a46434][[/color][color=800080]1[/color][color=a46434]][/color] [color=a46434]);[/color]
        [color=0000FF]if[/color][color=a46434]([/color] [color=800080]0[/color] [color=a46434]==[/color] abstract[color=a46434]([/color] agv[color=a46434][[/color][color=800080]1[/color][color=a46434]],[/color] outName [color=a46434])[/color] [color=a46434]){[/color]
            printf[color=a46434]([/color] [color=cc0000]"Error Encounters in abstracting file <%s>\n"[/color][color=a46434],[/color] agv[color=a46434][[/color][color=800080]1[/color][color=a46434]][/color] [color=a46434]);[/color]
        [color=a46434]}[/color]
        [color=0000FF]else[/color][color=a46434]{[/color]
            printf[color=a46434]([/color] [color=cc0000]"Abstract to <%s> succeed!\n"[/color][color=a46434],[/color] outName [color=a46434]);[/color]
        [color=a46434]}[/color]
        [color=0000FF]break[/color][color=a46434];[/color]
    [color=0000FF]default[/color][color=a46434]:[/color]
        puts[color=a46434]([/color] [color=cc0000]"usage: huffman infile"[/color]    [color=a46434]);[/color]
        puts[color=a46434]([/color] [color=cc0000]"to Compress file infile."[/color] [color=a46434]);[/color]
        puts[color=a46434]([/color] [color=cc0000]"       huffman infile u"[/color]  [color=a46434]);[/color]
        puts[color=a46434]([/color] [color=cc0000]"to Abstract file infile."[/color] [color=a46434]);[/color]
        [color=0000FF]break[/color][color=a46434];[/color]
    [color=a46434]}[/color]
    [color=0000FF]if[/color][color=a46434]([/color] OUTPUT [color=a46434])[/color] system[color=a46434]([/color][color=cc0000]"pause"[/color][color=a46434]);[/color]
    [color=0000FF]return[/color] [color=800080]0[/color][color=a46434];[/color]
[color=a46434]}[/color]

板凳

强!!!!牛!!!!!!

顶:哩哩啦啦哩哩啦啦了!!!!

3 楼

问题就出现在这里,最后,把不够1个byte的信息全用'0'补足,再输出....
[quote]
    if( top ){                      //  把队列清空
        fwrite( (char*)&buff+3, sizeof(bit8), 1,  out );
    }
[/quote]

另,我写本程序的原始目的是验证顺序化非顺序结构的外部存储,再从外存中重建!本来是比较成功的,但是,这个“多余信息”让我有些头痛...

4 楼

强的一B哦,佩服

5 楼

Do a google search, you will find a ton! All works. They are The original Hoffman algorithm.

But it is hard to find a Dynamic Hoffman Coding Algoritnm code.

6 楼

justforfun626老兄,你又来“just do a google search...”了!

讨论一下,行不?

这是种比较原始的压缩算法,我的目的只是练习树的重建;而我遇到的问题可能一点就通,你直接点拨点拨我不行么?

7 楼

顶一下!

8 楼

存个文件头,里边保存一下总位数。就知道最后的多少位该忽略掉了。

9 楼

顶!!  强烈的顶!!!

10 楼

我给你发的程序出错,是在VC下吗?我这边正常,可能是我的字符集不是完整的ASC码导致的。发给你的code还没有用你的位操作写入,注释里说了啊,可能变成乱码了,因为码长是16位整数,所以活活把文件扩大了一倍呵呵。另外,我考虑的是动态构造huffman tree, 可是典型的算法好像是提供编码树,为什么提倡用后者?最近比较忙,还没来得及仔细看你的code,过几天把位写入搞好再发发看。另外打算扩展到图像压缩,以及编写lzw编解码。“凑字节”真不是个轻松活...

我来回复

您尚未登录,请登录后再回复。点此登录或注册