Intro
(This is the continuation of HuffmannEncoder.java - computing prefix codes for arbitrary (generic) alphabets.)
(See the GitHub repository.)
This time:
- I have fixed the misspelled
Huffmann -> Huffman. CodeWord.prependBit()now works in \$\Theta(1)\$.- The symbol type parameter
Sis not required to beComparable.
Code
io.github.coderodde.encoding..java:
package io.github.coderodde.encoding;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
/**
* This class implements the Huffman encoding over probability distributions.
*
* @author Rodion "rodde" Efremov
* @version 1.0.0 (Nov 12, 2025)
* @since 1.0.0 (Nov 12, 2025)
*/
public final class HuffmanEncoder {
private HuffmanEncoder() {
// Hide constructor.
}
public static <S> Map<S, CodeWord>
encode(final Map<S, Double> weightDistribution) {
Objects.requireNonNull(weightDistribution,
"The input probability distribution is null");
if (weightDistribution.isEmpty()) {
return Map.of();
}
final Queue<PriorityQueueEntry<S>> queue = new PriorityQueue<>();
for (final Map.Entry<S, Double> entry : weightDistribution.entrySet()) {
final double weight = entry.getValue();
checkWeight(weight);
final S symbol = entry.getKey();
final Set<SymbolEntry<S>> set = new HashSet<>();
set.add(new SymbolEntry<>(symbol, weight));
queue.add(new PriorityQueueEntry<>(set, weight));
}
final Map<S, CodeWord> codewordMap =
new HashMap<>(weightDistribution.size());
for (final S symbol : weightDistribution.keySet()) {
codewordMap.put(symbol, new CodeWord(0));
}
while (queue.size() > 1) {
final PriorityQueueEntry<S> entry1 = queue.remove();
final PriorityQueueEntry<S> entry2 = queue.remove();
for (final SymbolEntry<S> symbolEntry : entry1.set) {
final S symbol = symbolEntry.symbol;
codewordMap.get(symbol).prependBit(true);
}
for (final SymbolEntry<S> symbolEntry : entry2.set) {
final S symbol = symbolEntry.symbol;
codewordMap.get(symbol).prependBit(false);
}
entry1.set.addAll(entry2.set);
queue.add(new PriorityQueueEntry<>(
entry1.set,
entry1.totalSetWeight +
entry2.totalSetWeight));
}
return codewordMap;
}
private static void checkWeight(final double weight) {
if (Double.isNaN(weight)) {
throw new IllegalArgumentException("weight is NaN");
}
if (weight <= 0.0) {
throw new IllegalArgumentException(
String.format("weight(%f) <= 0.0", weight));
}
if (Double.isInfinite(weight)) {
throw new IllegalArgumentException("weight is Infinity");
}
}
private static final class PriorityQueueEntry<T>
implements Comparable<PriorityQueueEntry<T>> {
Set<SymbolEntry<T>> set;
double totalSetWeight;
PriorityQueueEntry(final Set<SymbolEntry<T>> set,
final double totalSetWeight) {
this.set = set;
this.totalSetWeight = totalSetWeight;
}
@Override
public int compareTo(final PriorityQueueEntry<T> o) {
return Double.compare(totalSetWeight, o.totalSetWeight);
}
}
}
io.github.coderodde.encoding.CodeWord.java:
package io.github.coderodde.encoding;
import java.util.BitSet;
/**
* This class implements a <b>binary</b> code word in data compression
* scenarios.
*
* @author Rodion "rodde" Efremov
* @version 1.1.0 (Nov 13, 2025)
* @since 1.0.0 (Oct 28, 2025)
*/
public class CodeWord {
private int length;
private final BitSet bits;
public CodeWord(final int length) {
checkLength(length);
this.length = length;
this.bits = new BitSet(length);
}
public void prependBit(final boolean bit) {
if (bit) {
bits.set(length);
}
++length;
}
public int length() {
return length;
}
public boolean get(final int index) {
checkIndex(index);
return bits.get(index);
}
public void set(final int index) {
checkIndex(index);
bits.set(index);
}
public void unset(final int index) {
checkIndex(index);
bits.set(index, false);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(length);
for (int i = length - 1; i >= 0; --i) {
sb.append(get(i) ? "1" : "0");
}
return sb.toString();
}
private void checkIndex(final int index) {
if (index < 0) {
throw new IndexOutOfBoundsException(
String.format("index(%d) < 0", index));
}
if (index >= this.length) {
throw new IndexOutOfBoundsException(
String.format("index(%d) >= length(%d)",
index,
length));
}
}
private static void checkLength(final int length) {
if (length < 0) {
throw new IllegalArgumentException(
String.format("length(%d) < 1", length));
}
}
}
io.github.coderodde.encoding.SymbolEntry.java:
package io.github.coderodde.encoding;
import java.util.Objects;
/**
* Implements a wrapper containing a symbol from a generic alphabet and its
* weight.
*
* @author Rodion "rodde" Efremov
* @param <S> the actual alphabet symbol type.
* @version 1.0.0 (Nov 13, 2025)
* @since 1.0.0 (Nov 13, 2025)
*/
public final class SymbolEntry<S> {
public final S symbol;
public final double weight;
public SymbolEntry(final S symbol, final double weight) {
this.symbol = Objects.requireNonNull(symbol);
this.weight = validateWeight(weight);
}
private static double validateWeight(final double weight) {
if (Double.isNaN(weight)) {
throw new IllegalArgumentException(
"The input weight is NaN");
}
if (weight <= 0.0) {
throw new IllegalArgumentException(
String.format("weight(%f) <= 0.0", weight));
}
if (Double.isInfinite(weight)) {
throw new IllegalArgumentException("weight is +Infinity");
}
return weight;
}
@Override
public String toString() {
return String.format("[%s: %f]", symbol.toString(), weight);
}
@Override
public boolean equals(final Object object) {
final SymbolEntry<S> other = (SymbolEntry<S>) object;
return symbol.equals(other.symbol);
}
@Override
public int hashCode() {
return symbol.hashCode();
}
}
io.github.coderodde.encoding.demo.Demo.java:
package io.github.coderodde.encoding.demo;
import io.github.coderodde.encoding.CodeWord;
import io.github.coderodde.encoding.HuffmanEncoder;
import java.util.HashMap;
import java.util.Map;
/**
* This class demonstrates the Huffmann encoding.
*
* @author Rodion "rodde" Efremov
* @version 1.1.0 (Nov 13, 2025)
* @since 1.0.0 (Now 12, 2025)
*/
public final class Demo {
public static void main(String[] args) {
demo1();
dctWeek2Task1();
}
private static void demo1() {
System.out.println("--- demo1 ---");
Map<Character, Double> weightDistribution = new HashMap<>();
weightDistribution.put('a', 5.0);
weightDistribution.put('b', 9.0);
weightDistribution.put('c', 12.);
weightDistribution.put('d', 13.0);
weightDistribution.put('e', 16.0);
weightDistribution.put('f', 45.0);
Map<Character, CodeWord> code =
HuffmanEncoder.encode(weightDistribution);
System.out.println(code);
}
private static void dctWeek2Task1() {
System.out.println("--- dctWeek2Task1 ---");
Map<Character, Double> weightDistribution = new HashMap<>();
weightDistribution.put('a', 1.0 / 6.0);
weightDistribution.put('b', 1.0 / 5.0);
weightDistribution.put('c', 1.0 / 20.0);
weightDistribution.put('d', 1.0 / 5.0);
weightDistribution.put('e', 1.0 / 120.0);
weightDistribution.put('f', 1.0 / 8.0);
weightDistribution.put('g', 1.0 / 4.0);
Map<Character, CodeWord> code =
HuffmanEncoder.encode(weightDistribution);
System.out.println(code);
}
}
Output
The output follows:
--- demo1 ---
{a=0011, b=0010, c=011, d=010, e=000, f=1}
--- dctWeek2Task1 ---
{a=001, b=10, c=00010, d=11, e=00011, f=0000, g=01}
Critique request
As always, tell me anything that comes to mind, thank you!