(See the continuation of this post at HuffmanEncoder.java - computing prefix codes for arbitrary (generic) alphabets - Take II.)
I have this Java implementation of the Huffmann encoding. It looks like this:
Code
io.github.coderodde.compression.HuffmannEncoder.java:
package io.github.coderodde.encoding;
import io.github.coderodde.compression.CodeWord;
import io.github.coderodde.compression.SymbolEntry;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
/**
* This class implements the Huffman encoding over probability distributions.
*
* @author Rodion "rodde" Efremov
* @version 1.0.0 (Nov 12, 2025)
* @since 1.0.0 (Nov 12, 2025)
*/
public final class HuffmannEncoder {
private HuffmannEncoder() {
// Hide constructor.
}
public static <S extends Comparable<? super S>> Map<S, CodeWord>
encode(final Map<S, Double> weightDistribution) {
Objects.requireNonNull(weightDistribution,
"The input probability distribution is null");
if (weightDistribution.isEmpty()) {
return Map.of();
}
final Queue<PriorityQueueEntry<S>> queue = new PriorityQueue<>();
for (final Map.Entry<S, Double> entry : weightDistribution.entrySet()) {
final double weight = entry.getValue();
checkWeight(weight);
final S symbol = entry.getKey();
final Set<SymbolEntry<S>> set = new HashSet<>();
set.add(new SymbolEntry<>(symbol, weight));
queue.add(new PriorityQueueEntry<>(set, weight));
}
final Map<S, CodeWord> codewordMap =
new HashMap<>(weightDistribution.size());
for (final S symbol : weightDistribution.keySet()) {
codewordMap.put(symbol, new CodeWord(0));
}
while (queue.size() > 1) {
final PriorityQueueEntry<S> entry1 = queue.remove();
final PriorityQueueEntry<S> entry2 = queue.remove();
for (final SymbolEntry<S> symbolEntry : entry1.set) {
final S symbol = symbolEntry.symbol;
codewordMap.put(symbol,
codewordMap
.get(symbol)
.prependBit(true));
}
for (final SymbolEntry<S> symbolEntry : entry2.set) {
final S symbol = symbolEntry.symbol;
codewordMap.put(symbol,
codewordMap
.get(symbol)
.prependBit(false));
}
entry1.set.addAll(entry2.set);
queue.add(new PriorityQueueEntry<>(
entry1.set,
entry1.totalSetWeight + entry2.totalSetWeight));
}
return codewordMap;
}
private static void checkWeight(final double weight) {
if (Double.isNaN(weight)) {
throw new IllegalArgumentException("weight is NaN");
}
if (weight <= 0.0) {
throw new IllegalArgumentException(
String.format("weight(%f) <= 0.0", weight));
}
if (Double.isInfinite(weight)) {
throw new IllegalArgumentException("weight is Infinity");
}
}
private static final
class PriorityQueueEntry<T extends Comparable<? super T>>
implements Comparable<PriorityQueueEntry<T>> {
Set<SymbolEntry<T>> set;
double totalSetWeight;
PriorityQueueEntry(final Set<SymbolEntry<T>> set,
final double totalSetWeight) {
this.set = set;
this.totalSetWeight = totalSetWeight;
}
@Override
public int compareTo(final PriorityQueueEntry<T> o) {
return Double.compare(totalSetWeight, o.totalSetWeight);
}
}
}
io.github.coderodde.compression.CodeWord.java:
package io.github.coderodde.compression;
import java.util.BitSet;
/**
* This class implements a <b>binary</b> code word in data compression
* scenarios.
*
* @author Rodion "rodde" Efremov
* @version 1.0.0 (Oct 28, 2025)
* @since 1.0.0 (Oct 28, 2025)
*/
public class CodeWord {
private final int length;
private final BitSet bits;
public CodeWord(final int length) {
checkLength(length);
this.length = length;
this.bits = new BitSet(length);
}
public CodeWord prependBit(final boolean bit) {
final CodeWord nextCodeWord = new CodeWord(length + 1);
if (bit) {
nextCodeWord.set(length);
}
for (int i = 0; i < length; ++i) {
if (get(i)) {
nextCodeWord.set(i);
}
}
return nextCodeWord;
}
public int length() {
return length;
}
public boolean get(final int index) {
checkIndex(index);
return bits.get(index);
}
public void set(final int index) {
checkIndex(index);
bits.set(index);
}
public void unset(final int index) {
checkIndex(index);
bits.set(index, false);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(length);
for (int i = length - 1; i >= 0; --i) {
sb.append(get(i) ? "1" : "0");
}
return sb.toString();
}
private void checkIndex(final int index) {
if (index < 0) {
throw new IndexOutOfBoundsException(
String.format("index(%d) < 0", index));
}
if (index >= this.length) {
throw new IndexOutOfBoundsException(
String.format("index(%d) >= length(%d)",
index,
length));
}
}
private static void checkLength(final int length) {
if (length < 0) {
throw new IllegalArgumentException(
String.format("length(%d) < 1", length));
}
}
}
io.github.coderodde.compression.SymbolEntry.java:
package io.github.coderodde.compression;
import java.util.Objects;
public final class SymbolEntry<S extends Comparable<? super S>>
implements Comparable<SymbolEntry<S>> {
public final S symbol;
public final double weight;
public SymbolEntry(final S symbol, final double weight) {
this.symbol = Objects.requireNonNull(symbol);
this.weight = validateWeight(weight);
}
private static double validateWeight(final double weight) {
if (Double.isNaN(weight)) {
throw new IllegalArgumentException(
"The input weight is NaN");
}
if (weight <= 0.0) {
throw new IllegalArgumentException(
String.format("weight(%f) <= 0.0", weight));
}
if (Double.isInfinite(weight)) {
throw new IllegalArgumentException("weight is +Infinity");
}
return weight;
}
@Override
public String toString() {
return String.format("[%s: %f]", symbol.toString(), weight);
}
@Override
public int compareTo(final SymbolEntry<S> o) {
final int cmp = Double.compare(o.weight,
weight);
if (cmp != 0) {
return cmp;
}
return symbol.compareTo(o.symbol);
}
@Override
public boolean equals(final Object object) {
final SymbolEntry<S> other = (SymbolEntry<S>) object;
return symbol.equals(other.symbol);
}
@Override
public int hashCode() {
return symbol.hashCode();
}
}
io.github.coderodde.compression.demo.Demo.java:
package io.github.coderodde.encoding.demo;
import io.github.coderodde.compression.CodeWord;
import io.github.coderodde.encoding.HuffmannEncoder;
import java.util.HashMap;
import java.util.Map;
/**
* This class demonstrates the Huffmann encoding.
*
* @author Rodion "rodde" Efremov
* @version 1.0.0 (Nov 12, 2025)
* @since 1.0.0 (Now 12, 2025)
*/
public final class Demo {
public static void main(String[] args) {
Map<Character, Double> weightDistribution = new HashMap<>();
weightDistribution.put('a', 5.0);
weightDistribution.put('b', 9.0);
weightDistribution.put('c', 12.);
weightDistribution.put('d', 13.0);
weightDistribution.put('e', 16.0);
weightDistribution.put('f', 45.0);
Map<Character, CodeWord> code =
HuffmannEncoder.encode(weightDistribution);
System.out.println(code);
}
}
Demo output:
{a=0011, b=0010, c=011, d=010, e=000, f=1}
Critique request
As always, tell me anything that comes to mind, thank you.