日撸代码300行：第29-30天（Huffman编码建树、编码与解码）

代码来自闵老师”日撸 Java 三百行（21-30天）“，链接：https://blog.csdn.net/minfanphd/article/details/116975721
public static void fill(Object[] a, int fromIndex, int toIndex, Object val)将指定的 Object 引用分配给指定 Object 数组指定范围中的每个元素。填充的范围从索引 fromIndex（包括）一直到索引 toIndex（不包括）。

目前没整明白为啥数组charCounts的长度要是字母表长度的2倍减一？
答：哈夫曼数又称最优树，其总的节点数就是(2*n-1)，因为Huffman构造二叉树的时候树是满二叉树。
理解的过程参考了这篇文章：http://c.biancheng.net/view/3398.html

package datastructure.tree;

import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.Collector;
import java.util.stream.Collectors;



public class Huffman {
	
	
	
	class HuffmanNode{
		
		char character;
		
		
		int weight;
		
		
		HuffmanNode leftChild;

		
		HuffmanNode rightChild;
		
		HuffmanNode parent;
		
		
		public HuffmanNode(char paraCharacter, int paraWeight, HuffmanNode paraLeftChild, HuffmanNode paraRightChild, HuffmanNode paraParent) {
			// TODO Auto-generated constructor stub
			character = paraCharacter;
			weight = paraWeight;
			leftChild = paraLeftChild;
			rightChild = paraRightChild;
			parent = paraParent;
		}//The first constructor of the HuffmanNode.
		
		public String toString() {
			String resultString = "(" + character + ", " + weight + ")";
			return resultString;
		}//of toString
	}//of class HuffmanNode
	
	
	public static final int NUM_CHARS = 256;
	
	
	String inputText;
	
	
	int alphabetLength;
	
	
	char[] alphabet;
	
	
	int[] charCounts;
	
	
	int[] charMapping;
	
	
	String[] huffmanCode;
	
	
	HuffmanNode[] nodes;
	
	
	public Huffman(String paraFilename) {
		// TODO Auto-generated constructor stub
		charMapping = new int[NUM_CHARS];
		
		readText(paraFilename);
	}//of the first constructor
	
	
	public void readText(String paraFilename) {
		try {
			inputText = Files.newBufferedReader(Paths.get(paraFilename), StandardCharsets.UTF_8).lines().collect(Collectors.joining("n"));
		} catch (Exception ee) {
			// TODO: handle exception
			System.out.println(ee);
			System.exit(0);
		}//of try
		
		System.out.println("The text is:rn" + inputText);
	}//of readText
	
	
	public void constructAlphabet() {
		//Initialize.
		Arrays.fill(charMapping, -1);
		
		//The count for each char. At most NUM_CHARS chars.
		int[] tempCharCounts = new int[NUM_CHARS];
		
		//The index of the char in the ASCII char set.
		int tempCharIndex ;
		
		//Step 1. Scan the string to obtain the counts.
		char tempChar;
		for (int i = 0; i < inputText.length(); i++) {
			tempChar = inputText.charAt(i);
			
			tempCharIndex = (int) tempChar;
			
			System.out.print("" + tempCharIndex + " ");
			
			tempCharCounts[tempCharIndex]++;
			
		}//of for i
		
		//Step 2. Scan to determine the size of the alphabet.
		alphabetLength = 0;
		for (int i = 0; i < 255; i++) {
			if (tempCharCounts[i] > 0) {
				alphabetLength++;
			}//of if
		}//of for i
		System.out.println("rThe alphabet length is: " + alphabetLength);
		
		//Step 3. Compress to the alphabet.
		alphabet = new char[alphabetLength];
		charCounts = new int[2 * alphabetLength - 1];
		
		int tempCounter = 0;
		for (int i = 0; i < NUM_CHARS; i++) {
			if (tempCharCounts[i] > 0) {
				alphabet[tempCounter] = (char) i;
				charCounts[tempCounter] = tempCharCounts[i];
				charMapping[i] = tempCounter;
				tempCounter++;
			}//of if
		}//of for i
		
		System.out.println("rThe alphabet is: " + Arrays.toString(alphabet));
		System.out.println("Their counts are: " + Arrays.toString(charCounts));
		System.out.println("The char mappings are: " + Arrays.toString(charMapping));
		
	}//of constructAlphabet
	
	
	public void constructTree() {
		//Step 1. Allocate space.
		nodes = new HuffmanNode[alphabetLength * 2 - 1];
		boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];
		
		System.out.println(tempProcessed[0]);
		//Step 2. Initialize leaves.
		for (int i = 0; i < alphabetLength; i++) {
			nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null, null);
		}//of for i
		
		//Step 3. Construct the tree.
		for (int i = alphabetLength; i < tempProcessed.length; i++) {
			int tempLeft, tempRight, tempMinmal;
			
			//Step 3.1
			tempLeft = -1; tempMinmal = Integer.MAX_VALUE;
			for (int j = 0; j < i; j++) {
				if (tempProcessed[j]) {
					continue;
				}//of if
				if (tempMinmal > charCounts[j]) {
					tempMinmal = charCounts[j];
					tempLeft = j;
				}//of if
			}//of for j
			tempProcessed[tempLeft] = true;
			
			//Step 3.2
			tempRight = -1; tempMinmal = Integer.MAX_VALUE;
			for (int j = 0; j < i; j++) {
				if (tempProcessed[j]) {
					continue;
				}//of if
				if (tempMinmal > charCounts[j]) {
					tempMinmal = charCounts[j];
					tempRight = j;
				}//of if
			}//of for j
			tempProcessed[tempRight] = true;
			System.out.println("Selecting " + tempLeft + " and " + tempRight);
			
			//Step 3.3
			charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
			nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight], null);
			
			//Step 3.4
			nodes[tempLeft].parent = nodes[i];
			nodes[tempRight].parent = nodes[i];
			System.out.println("The childen of " + i + " are " + tempLeft + " and " + tempRight);
		}//of for i
	}//of constructTree
	
	
	public HuffmanNode getRoot() {
		return nodes[nodes.length - 1];
	}//of getRoot
	
	
	public void preOrderVisit(HuffmanNode paraNode) {
		System.out.println("(" + paraNode.character + ", " + paraNode.weight + ") " );
		if (paraNode.leftChild != null) {
			preOrderVisit(paraNode.leftChild);
		}//of if
		if (paraNode.rightChild != null) {
			preOrderVisit(paraNode.rightChild);
		}//of if
	}//of preOrderVisit
	
	
	public void generateCodes() {
		huffmanCode = new String[alphabetLength];
		HuffmanNode tempNodes;
		
		for (int i = 0; i < alphabet.length; i++) {
			tempNodes = nodes[i];
			String tempCharCode = "";
			
			while (tempNodes.parent != null) {
				if (tempNodes == tempNodes.parent.leftChild) {
					tempCharCode = "0" + tempCharCode;
				}else {
					tempCharCode = "1" + tempCharCode;
				}//of if
				tempNodes = tempNodes.parent;
			}//of while
			huffmanCode[i] = tempCharCode;
		}//of for i
	}//of generateCodes
	
	
	public String coding(String paraString) {
		String resultCodeString = "";
		
		int tempIndex;
		for (int i = 0; i < paraString.length(); i++) {
			//From the original char to the location in the alphabet.
			tempIndex = charMapping[(int) paraString.charAt(i)];
			
			//From the location in the alphabet to the code.
			resultCodeString += huffmanCode[tempIndex];
			System.out.println("tempIndex:rn" + tempIndex + "rresultCodeString:rn" + resultCodeString);
		}//of for i
		return resultCodeString;
	}//of coding
	
	
	public String decoding(String paraString) {
		String resultCodeString = "";
		HuffmanNode tempNode = getRoot();
		
		for (int i = 0; i < paraString.length(); i++) {
			
			if (paraString.charAt(i) == '0') {
				tempNode = tempNode.leftChild;
			}else {
				tempNode = tempNode.rightChild;
			}//of if
			
			if (tempNode.leftChild == null) {
				resultCodeString += tempNode.character;
				tempNode = getRoot();
			}//of if

		}//of for i
		
		return resultCodeString;
	}//of decoding
	
	
	public static void main(String args[]) {
		Huffman tempHuffman = new Huffman("E:\Datasets\UCIdatasets\temp\HuffmanTest.txt");
		tempHuffman.constructAlphabet();
		tempHuffman.constructTree();
		
		HuffmanNode tempRoot = tempHuffman.getRoot();
		System.out.println("The root is: " + tempRoot);
		System.out.println("Preorder visit:");
		tempHuffman.preOrderVisit(tempRoot);
		
		tempHuffman.generateCodes();
		
		String tempCoded = tempHuffman.coding("Huffman test!");
		System.out.println("Coded: " + tempCoded);
		
		String tempDecoded = tempHuffman.decoding(tempCoded);
		System.out.println("Decoded: " + tempDecoded);
	}//of main

}//of Huffman

梳理回顾一下程序调试过程中遇到的问题。

inputText：ASCII小于256的码字对应的都是英文字符，没有中文字符。运行过程中因此报错，根据输出结果及多次改变文本内容发现错误。方法constructAlphabet()：有三个数组，一个用来存字母，即字母表；一个用来存字母的计数，即charCount；最后一个用来存字母的序号，按照在256个ASCII的位置。方法constructTree()：先分配空间，再初始化叶子节点（将i及其个数传入huffmannode），最后建树。建立树又分为三步，第一步找出权重最低的节点，第二步找到权重次低的节点，第三步将前两步找到的节点相加，获得新的节点，并参与接下来的建树。第四步将子节点和父节点相连。自己写的时候第二个参数写成了charCounts[tempLeft] + charCounts[tempRight]；看起来没影响，其实每次相加后的值没有存入队列，做比较的时候就没参加比较。方法generateCodes() ：基本思想左分支前面加’0’；右分支前面加’1’；编码生成为沿着树自底向上。方法coding()：刚开始一直没理解，原来文件读入的只是为了建立二叉树，可以获取编码。正真的是对输入的字符串进行编码，输入的字符串必须是文件中包含的字符。这样在generateCodes() 中才能生成编码。方法decoding()：自顶向下，是generateCodes()的逆过程。

日撸代码300行：第29-30天（Huffman编码 建树、编码与解码）

Java相关栏目本月热门文章

日撸代码300行：第29-30天（Huffman编码建树、编码与解码）