算法导论之贪心算法：哈夫曼编码

最新推荐文章于 2024-02-05 15:13:26 发布

原创最新推荐文章于 2024-02-05 15:13:26 发布 · 2.6k 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#算法导论 #贪心算法

算法导论专栏收录该内容

12 篇文章

订阅专栏

哈夫曼编码是可变字长编码(VLC)的一种。其思想是赋予高频字符短字码，赋予高频字符长字码。

通过这种编码可以有效的压缩数据，通常是20%~90%。

对于构造贪心算法，我们可以不用算法导论之贪心算法：活动选择问题中提到了所有步骤。

我们可以将其更一般化为：

1、将最优化问题转化为这样的形式：对其作出一次选择后，只剩下一个子问题需要求解。

2、证明作出贪心选择后，原问题总是存在最优解，即贪心算法是安全的。

3、证明作出贪心算法后，剩下的子问题满足性质：其最优解与贪心选择组合即可得到原问题的最优解，这样就得到了最优子结构。

代码试下如下：

为了实现这个题目，我额外写了一个小堆排序的优先队列算法包含：建堆、调整堆、堆排序、选择最小值并维护堆的性质、插入值并维护堆的性质。

#include <stdio.h>

struct Node{
	struct Node *left;
	struct Node *right;
	char value;
	float freq;
};
typedef struct Node Node;

int LEFT(int i) {
	return (i << 1) + 1; // 括号必须要，否则先做加法运算
}
int RIGHT(int i) {
	return (i << 1) + 2;
}

int PARENT(int i) {
	return (i - 1) >> 1;
}

void SWAP(Node *A[], int i, int j) {
	Node *temp = A[i];
	A[i] = A[j];
	A[j] = temp;
	return;
}

void MAX_HEAPIFY(Node *A[],int i,int Heap_Size){
	int Largest = i;
	int L = LEFT(i), R = RIGHT(i);
	if (L < Heap_Size && A[L]->freq <= A[i]->freq) {
		Largest = L;
	}
	if (R < Heap_Size && A[R]->freq <= A[Largest]->freq) {
		Largest = R;
	}
	if (Largest != i) {
		SWAP(A, i, Largest);
		MAX_HEAPIFY(A, Largest, Heap_Size);
	}
}
void BUILD_MIN_HEAP(Node *A[],int len) {
	int i = (len - 1) / 2;
	for (; i >= 0; i--) {
		MAX_HEAPIFY(A, i, len);
	}
}

void HEAP_SORT(Node *A[],int len) {
	int Heap_Size = len;
	int i = len - 1;
	for (; i >= 1; i--) {
		SWAP(A, 0, i);
		Heap_Size--;
		MAX_HEAPIFY(A, 0, Heap_Size);
	}
}
void INSERT(Node *A[],int len,Node *z){
	A[len]=z;
	int i =len;
     while(i>0 && A[PARENT(i)]->freq>A[i]->freq){
        Node *temp = A[i];
        A[i]=A[PARENT(i)];
        A[PARENT(i)]=temp;
        i=PARENT(i);
     }
	
	/*
	//测试每次插入后的数据 
	int i = 0;
	for(; i < len+1; i++) {
		printf("%0.2f,",A[i]->freq);
	}
	printf("\n");*/
}

Node *EXTRACT_MIN(Node *A[],int len){
	int Heap_Size = len;
	int i = len - 1;
	if(i>0){
		SWAP(A, 0, i);
		Heap_Size--;
		MAX_HEAPIFY(A, 0, Heap_Size);
	}
	return A[i];
}
/*
int main()
{
	int Heap[] = { 16, 4, 10, 14, 7, 9, 3, 2, 8, 1 };
	int n = sizeof(Heap)/sizeof(int);
	printf("%d\n",n);

	Node *AB[n];
	int i=0;
	for(;i<n;i++){
		Node *temp = (Node *)malloc(sizeof(Node));
		temp->freq=Heap[i];
		AB[i]=temp;
	}
	//测试健小堆 
	BUILD_MIN_HEAP(AB,n);
	i = 0;
	for (; i < n; i++) {
		printf("%0.2f,",AB[i]->freq);
	}
	//测试输出最小值 
	printf("\n");
	EXTRACT_MIN(AB,n);
	i = 0;
	for (; i < n; i++) {
		printf("%0.2f,",AB[i]->freq);
	}
	//测试小堆排序 
	printf("\n");
	HEAP_SORT(AB,n-1);
	i=0;
	for (; i < n; i++) {
		printf("%0.2f,",AB[i]->freq);
	}
	return 0;
}
*/

接下来是实现 构造哈夫曼树和输出哈夫曼编码：

#include <stdio.h>
#include "LittleHeapSort.c" 

Node *HUFFMAN(Node *input[],int n){
	int i=0,len=n;

	Node *x,*y;
	for(;i<n-1;i++){
		Node *z=(Node *)malloc(sizeof(Node));
		x=EXTRACT_MIN(input,len--);
		y=EXTRACT_MIN(input,len--);
		z->left=x;
		z->right=y;
		z->freq=x->freq+y->freq;
		INSERT(input,len++,z);
	}
	return EXTRACT_MIN(input,len);
}

void printfHuffmanCode(Node *root,char c[],int now,int len){
	if(root->left==NULL && root->right==NULL && now <len){
		printf("%c:",root->value);
		int position = now;
		int i=0;
		for(;i<position;i++){
			printf("%c",c[i]);
		}
		printf("\n");
		return;
	}

	c[now++]='0';
	printfHuffmanCode(root->left,c,now,len);
	now--;
	c[now++]='1';
	printfHuffmanCode(root->right,c,now,len);
	now--;
}
int main(int argc, char *argv[])
{
	int n = 6;
	Node *input[n];
	Node *node0=(Node *)malloc(sizeof(Node));
	node0->value='a';node0->freq=45;node0->left=NULL;node0->right=NULL;
	input[0]=node0;
	
	Node *node1=(Node *)malloc(sizeof(Node));
	node1->value='b';node1->freq=13;node1->left=NULL;node1->right=NULL;
	input[1]=node1;
	
	Node *node2=(Node *)malloc(sizeof(Node));
	node2->value='c';node2->freq=12;node2->left=NULL;node2->right=NULL;
	input[2]=node2;
	
	Node *node3=(Node *)malloc(sizeof(Node));
	node3->value='d';node3->freq=16;node3->left=NULL;node3->right=NULL;
	input[3]=node3;
	
	Node *node4=(Node *)malloc(sizeof(Node));
	node4->value='e';node4->freq=9;node4->left=NULL;node4->right=NULL;
	input[4]=node4;
	Node *node5=(Node *)malloc(sizeof(Node));
	node5->value='f';node5->freq=5;node5->left=NULL;node5->right=NULL;
	input[5]=node5;
	
	BUILD_MIN_HEAP(input,n);
	Node *root = HUFFMAN(input,n);
	
	printf("%0.2f\nthe huffmanCode as follow:\n",root->freq);
	int length =32;
	char c[length];
	printfHuffmanCode(root,c,0,length);
	return 0;
}

我们来考虑 时间复杂度：

堆排序：

调整堆：O(lgn)。

建堆：O(nlgn)。

堆排序：O(nlgn)。

返回最小值并维护堆的性质：O(lgn)。

插入值并维护堆的性质：O(lgn)。

哈夫曼树构造和哈夫曼编码：

哈夫曼树构造：O(nlgn)。

哈夫曼编码：O(n)。

参考资料：
算法导论

备注：
转载请注明出处：http://blog.csdn.net/wsyw126/article/details/51439343
作者：WSYW126