哈夫曼编码
思路分析代码实现运行结果 解码
思路代码实现运行结果
哈夫曼编码哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式,哈夫曼编码是可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。
思路分析以压缩字符串为例
1、计算各个字符的数量
2、按照字符出现的次数为权值构建哈夫曼树
3、根据哈夫曼树生成前缀码
4、根据前缀码将字符串进行编码
public class Node implements Comparable{ private Byte data; private int value; private Node left; private Node right; public Node(Byte data, int value) { this.data = data; this.value = value; } getter and setter 方法 @Override public String toString() { return "[" + "data="+data+",value=" + value + ']'; } @Override public int compareTo(Node node) { //表示从小到达排序 return this.value - node.value; } }
import java.util.*;
public class HuffmanZip {
static Map huffmanCodeMap = new HashMap<>();
static StringBuilder builder = new StringBuilder();
public static void main(String[] args) {
String str = "i like java";
byte[] bytes = str.getBytes();
byte[] zip = huffmanZip(bytes);
System.out.println("压缩前"+Arrays.toString(bytes)); //压缩前:[105, 32, 108, 105, 107, 101, 32, 106, 97, 118, 97]
System.out.println("压缩后"+Arrays.toString(zip)); //压缩后:[-10, 118, 42, -57, 0]
}
public static byte[] huffmanZip(byte[] bytes){
List nodes = getNodes(bytes);
Node root = createHuffmanTree(nodes);
getCodeMap(root,"",builder);
byte[] zip = zip(bytes, huffmanCodeMap);
return zip;
}
public static byte[] zip(byte[] bytes,Map huffmanCodeMap){
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < bytes.length; i++) {
stringBuilder.append(huffmanCodeMap.get(bytes[i]));
}
//stringBuilder:111101100111011000101010110001110
int length;
if (stringBuilder.length() % 8 == 0) {
length = stringBuilder.length() / 8;
}else {
length = stringBuilder.length() / 8 + 1;
}
byte[] huffmanBytes = new byte[length];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strBytes;
if(i+8 > stringBuilder.length()){
strBytes = stringBuilder.substring(i);
}else {
strBytes = stringBuilder.substring(i,i+8);
}
huffmanBytes[index] = (byte)Integer.parseInt(strBytes,2);
index++;
}
return huffmanBytes;
}
public static List getNodes(byte[] bytes){
List nodes = new ArrayList<>();
Map countMap = new HashMap<>();
for (int i = 0; i < bytes.length; i++) {
Integer count = countMap.get(bytes[i]);
if (count == null) {
countMap.put(bytes[i],1);
}else{
countMap.put(bytes[i],count+1);
}
}
Set keys = countMap.keySet();
Iterator iterator = keys.iterator();
for (Byte key: keys) {
nodes.add(new Node(key,countMap.get(key)));
}
return nodes;
}
public static Node createHuffmanTree(List nodes){
while(nodes.size() >= 2){
Collections.sort(nodes);
Node left = nodes.get(0);
Node right = nodes.get(1);
Node parent = new Node(null,left.getValue()+right.getValue());
parent.setLeft(left);
parent.setRight(right);
nodes.remove(left);
nodes.remove(right);
nodes.add(parent);
}
return nodes.get(0);
}
public static void getCodeMap(Node node,String code,StringBuilder builder){
StringBuilder stringBuilder = new StringBuilder(builder);
stringBuilder.append(code);
if (node != null) {
if (node.getData() == null){
getCodeMap(node.getLeft(),"0",stringBuilder);
getCodeMap(node.getRight(),"1",stringBuilder);
}else{
huffmanCodeMap.put(node.getData(),stringBuilder.toString());
}
}
}
}
运行结果
压缩前[105, 32, 108, 105, 107, 101, 32, 106, 97, 118, 97] 压缩后[-10, 118, 42, -57, 0] Process finished with exit code 0解码 思路
1、将压缩后[-10, 118, 42, -57, 0]转成前缀码111101100111011000101010110001110
2、对照huffmanCodeMap {32=101, 97=110, 101=000, 118=001, 105=111, 106=010, 107=011, 108=100}将111101100111011000101010110001110转化为原来的数据
public static byte[] unZip(Map huffmanCodeMap,byte[] huffmanBytes){
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < huffmanBytes.length; i++) {
byte b = huffmanBytes[i];
boolean flag = (i == huffmanBytes.length-1);
stringBuilder.append(toBitString(!flag,b));
}
Map map = new HashMap<>();
for (Map.Entry entry : huffmanCodeMap.entrySet()){
map.put(entry.getValue(),entry.getKey());
}
List list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length();) {
int count = 1;
boolean flag = true;
Byte b = null;
while(flag){
String key = stringBuilder.substring(i,i+count);
b = map.get(key);
if (b == null) {
count++;
}else{
flag = false;
}
}
list.add(b);
i += count;
}
byte[] b = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
public static String toBitString(boolean flag,byte b){
int temp = b;
if(flag){
temp |= 256;
}
String str = Integer.toBinaryString(temp);
if(flag){
return str.substring(str.length()-8);
}else{
return str;
}
}
测试方法
public static void main(String[] args) {
String str = "i like java";
byte[] bytes = str.getBytes();
byte[] zip = huffmanZip(bytes);
System.out.println("压缩前"+Arrays.toString(bytes)); //压缩前:[105, 32, 108, 105, 107, 101, 32, 106, 97, 118, 97]
System.out.println("压缩后"+Arrays.toString(zip)); //压缩后:[-10, 118, 42, -57, 0]
byte[] unzip = unZip(huffmanCodeMap, zip);
System.out.println("解压后"+Arrays.toString(unzip));
System.out.println("原来的字符为"+new String(unzip));
}
运行结果
压缩前[105, 32, 108, 105, 107, 101, 32, 106, 97, 118, 97] 压缩后[-10, 118, 42, -57, 0] 解压后[105, 32, 108, 105, 107, 101, 32, 106, 97, 118, 97] 原来的字符为i like java Process finished with exit code 0



