代码来自闵老师”日撸 Java 三百行(21-30天)“,链接:https://blog.csdn.net/minfanphd/article/details/116975721
public static void fill(Object[] a, int fromIndex, int toIndex, Object val)将指定的 Object 引用分配给指定 Object 数组指定范围中的每个元素。填充的范围从索引 fromIndex(包括)一直到索引 toIndex(不包括)。
目前没整明白为啥数组charCounts的长度要是字母表长度的2倍减一?
答:哈夫曼数又称最优树,其总的节点数就是(2*n-1),因为Huffman构造二叉树的时候树是满二叉树。
理解的过程参考了这篇文章:http://c.biancheng.net/view/3398.html
package datastructure.tree;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.Collector;
import java.util.stream.Collectors;
public class Huffman {
class HuffmanNode{
char character;
int weight;
HuffmanNode leftChild;
HuffmanNode rightChild;
HuffmanNode parent;
public HuffmanNode(char paraCharacter, int paraWeight, HuffmanNode paraLeftChild, HuffmanNode paraRightChild, HuffmanNode paraParent) {
// TODO Auto-generated constructor stub
character = paraCharacter;
weight = paraWeight;
leftChild = paraLeftChild;
rightChild = paraRightChild;
parent = paraParent;
}//The first constructor of the HuffmanNode.
public String toString() {
String resultString = "(" + character + ", " + weight + ")";
return resultString;
}//of toString
}//of class HuffmanNode
public static final int NUM_CHARS = 256;
String inputText;
int alphabetLength;
char[] alphabet;
int[] charCounts;
int[] charMapping;
String[] huffmanCode;
HuffmanNode[] nodes;
public Huffman(String paraFilename) {
// TODO Auto-generated constructor stub
charMapping = new int[NUM_CHARS];
readText(paraFilename);
}//of the first constructor
public void readText(String paraFilename) {
try {
inputText = Files.newBufferedReader(Paths.get(paraFilename), StandardCharsets.UTF_8).lines().collect(Collectors.joining("n"));
} catch (Exception ee) {
// TODO: handle exception
System.out.println(ee);
System.exit(0);
}//of try
System.out.println("The text is:rn" + inputText);
}//of readText
public void constructAlphabet() {
//Initialize.
Arrays.fill(charMapping, -1);
//The count for each char. At most NUM_CHARS chars.
int[] tempCharCounts = new int[NUM_CHARS];
//The index of the char in the ASCII char set.
int tempCharIndex ;
//Step 1. Scan the string to obtain the counts.
char tempChar;
for (int i = 0; i < inputText.length(); i++) {
tempChar = inputText.charAt(i);
tempCharIndex = (int) tempChar;
System.out.print("" + tempCharIndex + " ");
tempCharCounts[tempCharIndex]++;
}//of for i
//Step 2. Scan to determine the size of the alphabet.
alphabetLength = 0;
for (int i = 0; i < 255; i++) {
if (tempCharCounts[i] > 0) {
alphabetLength++;
}//of if
}//of for i
System.out.println("rThe alphabet length is: " + alphabetLength);
//Step 3. Compress to the alphabet.
alphabet = new char[alphabetLength];
charCounts = new int[2 * alphabetLength - 1];
int tempCounter = 0;
for (int i = 0; i < NUM_CHARS; i++) {
if (tempCharCounts[i] > 0) {
alphabet[tempCounter] = (char) i;
charCounts[tempCounter] = tempCharCounts[i];
charMapping[i] = tempCounter;
tempCounter++;
}//of if
}//of for i
System.out.println("rThe alphabet is: " + Arrays.toString(alphabet));
System.out.println("Their counts are: " + Arrays.toString(charCounts));
System.out.println("The char mappings are: " + Arrays.toString(charMapping));
}//of constructAlphabet
public void constructTree() {
//Step 1. Allocate space.
nodes = new HuffmanNode[alphabetLength * 2 - 1];
boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];
System.out.println(tempProcessed[0]);
//Step 2. Initialize leaves.
for (int i = 0; i < alphabetLength; i++) {
nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null, null);
}//of for i
//Step 3. Construct the tree.
for (int i = alphabetLength; i < tempProcessed.length; i++) {
int tempLeft, tempRight, tempMinmal;
//Step 3.1
tempLeft = -1; tempMinmal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
}//of if
if (tempMinmal > charCounts[j]) {
tempMinmal = charCounts[j];
tempLeft = j;
}//of if
}//of for j
tempProcessed[tempLeft] = true;
//Step 3.2
tempRight = -1; tempMinmal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
}//of if
if (tempMinmal > charCounts[j]) {
tempMinmal = charCounts[j];
tempRight = j;
}//of if
}//of for j
tempProcessed[tempRight] = true;
System.out.println("Selecting " + tempLeft + " and " + tempRight);
//Step 3.3
charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight], null);
//Step 3.4
nodes[tempLeft].parent = nodes[i];
nodes[tempRight].parent = nodes[i];
System.out.println("The childen of " + i + " are " + tempLeft + " and " + tempRight);
}//of for i
}//of constructTree
public HuffmanNode getRoot() {
return nodes[nodes.length - 1];
}//of getRoot
public void preOrderVisit(HuffmanNode paraNode) {
System.out.println("(" + paraNode.character + ", " + paraNode.weight + ") " );
if (paraNode.leftChild != null) {
preOrderVisit(paraNode.leftChild);
}//of if
if (paraNode.rightChild != null) {
preOrderVisit(paraNode.rightChild);
}//of if
}//of preOrderVisit
public void generateCodes() {
huffmanCode = new String[alphabetLength];
HuffmanNode tempNodes;
for (int i = 0; i < alphabet.length; i++) {
tempNodes = nodes[i];
String tempCharCode = "";
while (tempNodes.parent != null) {
if (tempNodes == tempNodes.parent.leftChild) {
tempCharCode = "0" + tempCharCode;
}else {
tempCharCode = "1" + tempCharCode;
}//of if
tempNodes = tempNodes.parent;
}//of while
huffmanCode[i] = tempCharCode;
}//of for i
}//of generateCodes
public String coding(String paraString) {
String resultCodeString = "";
int tempIndex;
for (int i = 0; i < paraString.length(); i++) {
//From the original char to the location in the alphabet.
tempIndex = charMapping[(int) paraString.charAt(i)];
//From the location in the alphabet to the code.
resultCodeString += huffmanCode[tempIndex];
System.out.println("tempIndex:rn" + tempIndex + "rresultCodeString:rn" + resultCodeString);
}//of for i
return resultCodeString;
}//of coding
public String decoding(String paraString) {
String resultCodeString = "";
HuffmanNode tempNode = getRoot();
for (int i = 0; i < paraString.length(); i++) {
if (paraString.charAt(i) == '0') {
tempNode = tempNode.leftChild;
}else {
tempNode = tempNode.rightChild;
}//of if
if (tempNode.leftChild == null) {
resultCodeString += tempNode.character;
tempNode = getRoot();
}//of if
}//of for i
return resultCodeString;
}//of decoding
public static void main(String args[]) {
Huffman tempHuffman = new Huffman("E:\Datasets\UCIdatasets\temp\HuffmanTest.txt");
tempHuffman.constructAlphabet();
tempHuffman.constructTree();
HuffmanNode tempRoot = tempHuffman.getRoot();
System.out.println("The root is: " + tempRoot);
System.out.println("Preorder visit:");
tempHuffman.preOrderVisit(tempRoot);
tempHuffman.generateCodes();
String tempCoded = tempHuffman.coding("Huffman test!");
System.out.println("Coded: " + tempCoded);
String tempDecoded = tempHuffman.decoding(tempCoded);
System.out.println("Decoded: " + tempDecoded);
}//of main
}//of Huffman
梳理回顾一下程序调试过程中遇到的问题。
- inputText:ASCII小于256的码字对应的都是英文字符,没有中文字符。运行过程中因此报错,根据输出结果及多次改变文本内容发现错误。方法constructAlphabet():有三个数组,一个用来存字母,即字母表;一个用来存字母的计数,即charCount;最后一个用来存字母的序号,按照在256个ASCII的位置。方法constructTree():先分配空间,再初始化叶子节点(将i及其个数传入huffmannode),最后建树。建立树又分为三步,第一步找出权重最低的节点,第二步找到权重次低的节点,第三步将前两步找到的节点相加,获得新的节点,并参与接下来的建树。第四步将子节点和父节点相连。自己写的时候第二个参数写成了charCounts[tempLeft] + charCounts[tempRight];看起来没影响,其实每次相加后的值没有存入队列,做比较的时候就没参加比较。方法generateCodes() :基本思想左分支前面加’0’;右分支前面加’1’;编码生成为沿着树自底向上。方法coding():刚开始一直没理解,原来文件读入的只是为了建立二叉树,可以获取编码。正真的是对输入的字符串进行编码,输入的字符串必须是文件中包含的字符。这样在generateCodes() 中才能生成编码。方法decoding():自顶向下,是generateCodes()的逆过程。



