Huffman Tree Character Encoding
Problem
Huffman coding is a method of encoding characters based on their frequency. Each letter is assigned a variable-length binary string, such as 0101 or 111110, where shorter lengths correspond to more common letters. To accomplish this, a binary tree is built such that the path from the root to any leaf uniquely maps to a character. When traversing the path, descending to a left child corresponds to a 0 in the prefix, while descending right corresponds to 1.
Here is an example tree (note that only the leaf nodes have letters):
*
/ \
* *
/ \ / \
* a t *
/ \
c s
With this encoding, cats would be represented as 0000110111.
Given a dictionary of character frequencies, build a Huffman tree, and use it to determine a mapping between characters and their encoded binary strings.
Examples
Example 1
Input: {'c': 1, 'a': 1, 't': 1, 's': 1}
Output: {'c': '000', 'a': '01', 't': '10', 's': '11'}
Explanation: All characters have equal frequency, so the tree can be built in various ways. One valid encoding assigns 'c' the longest path and others shorter paths.
Example 2
Input: {'a': 5, 'b': 2, 'c': 1, 'd': 1}
Output: {'a': '0', 'b': '10', 'c': '110', 'd': '111'}
Explanation: 'a' is most frequent so gets shortest code '0'. Less frequent characters get longer codes.
Example 3
Input: {'x': 10, 'y': 5, 'z': 3}
Output: {'x': '0', 'y': '10', 'z': '11'}
Explanation: With three characters, 'x' gets the shortest code due to highest frequency.
Example 4
Input: {'e': 8, 't': 6, 'a': 4, 'o': 2, 'i': 2, 'n': 1, 's': 1}
Output: {'e': '00', 't': '01', 'a': '10', 'o': '110', 'i': '1110', 'n': '11110', 's': '11111'}
Explanation: More realistic example showing how frequency determines code length - most frequent characters get shorter codes.
Solution
Method 1 - Priority Queue with Tree Construction
Intuition
The Huffman algorithm works by repeatedly combining the two nodes with the lowest frequencies. We start with each character as a leaf node, then build the tree bottom-up by merging nodes. Characters with higher frequencies end up closer to the root (shorter paths), while rare characters get longer paths.
Approach
- Create leaf nodes for each character with their frequencies
- Use a priority queue (min-heap) to always get the two nodes with lowest frequencies
- Repeatedly extract two minimum nodes and create a new internal node with them as children
- Continue until only one node remains (the root)
- Traverse the tree to generate binary codes for each character
- Return the character-to-code mapping
Code Implementation
C++
class Solution {
public:
map<char, string> huffmanEncoding(map<char, int>& freq) {
if (freq.size() == 1) {
map<char, string> ans;
ans[freq.begin()->first] = "0";
return ans;
}
priority_queue<Node*, vector<Node*>, Compare> pq;
for (auto& p : freq) {
pq.push(new Node(p.first, p.second));
}
while (pq.size() > 1) {
Node* right = pq.top(); pq.pop();
Node* left = pq.top(); pq.pop();
Node* merged = new Node('\0', left->freq + right->freq);
merged->left = left;
merged->right = right;
pq.push(merged);
}
Node* root = pq.top();
map<char, string> ans;
generateCodes(root, "", ans);
return ans;
}
private:
struct Node {
char ch;
int freq;
Node* left;
Node* right;
Node(char c, int f) : ch(c), freq(f), left(nullptr), right(nullptr) {}
};
struct Compare {
bool operator()(Node* a, Node* b) {
return a->freq > b->freq;
}
};
void generateCodes(Node* node, string code, map<char, string>& ans) {
if (!node) return;
if (!node->left && !node->right) {
ans[node->ch] = code;
return;
}
generateCodes(node->left, code + "0", ans);
generateCodes(node->right, code + "1", ans);
}
};
Go
import (
"container/heap"
)
type Node struct {
ch rune
freq int
left *Node
right *Node
}
type PriorityQueue []*Node
func (pq PriorityQueue) Len() int { return len(pq) }
func (pq PriorityQueue) Less(i, j int) bool {
return pq[i].freq < pq[j].freq
}
func (pq PriorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
}
func (pq *PriorityQueue) Push(x interface{}) {
*pq = append(*pq, x.(*Node))
}
func (pq *PriorityQueue) Pop() interface{} {
old := *pq
n := len(old)
item := old[n-1]
*pq = old[0 : n-1]
return item
}
func huffmanEncoding(freq map[rune]int) map[rune]string {
if len(freq) == 1 {
ans := make(map[rune]string)
for ch := range freq {
ans[ch] = "0"
}
return ans
}
pq := &PriorityQueue{}
heap.Init(pq)
for ch, f := range freq {
heap.Push(pq, &Node{ch: ch, freq: f})
}
for pq.Len() > 1 {
left := heap.Pop(pq).(*Node)
right := heap.Pop(pq).(*Node)
merged := &Node{
ch: 0,
freq: left.freq + right.freq,
left: left,
right: right,
}
heap.Push(pq, merged)
}
root := heap.Pop(pq).(*Node)
ans := make(map[rune]string)
generateCodes(root, "", ans)
return ans
}
func generateCodes(node *Node, code string, ans map[rune]string) {
if node == nil {
return
}
if node.left == nil && node.right == nil {
ans[node.ch] = code
return
}
generateCodes(node.left, code+"0", ans)
generateCodes(node.right, code+"1", ans)
}
Java
class Solution {
public Map<Character, String> huffmanEncoding(Map<Character, Integer> freq) {
if (freq.size() == 1) {
Map<Character, String> ans = new HashMap<>();
ans.put(freq.keySet().iterator().next(), "0");
return ans;
}
PriorityQueue<Node> pq = new PriorityQueue<>((a, b) -> a.freq - b.freq);
for (Map.Entry<Character, Integer> entry : freq.entrySet()) {
pq.offer(new Node(entry.getKey(), entry.getValue()));
}
while (pq.size() > 1) {
Node left = pq.poll();
Node right = pq.poll();
Node merged = new Node('\0', left.freq + right.freq);
merged.left = left;
merged.right = right;
pq.offer(merged);
}
Node root = pq.poll();
Map<Character, String> ans = new HashMap<>();
generateCodes(root, "", ans);
return ans;
}
class Node {
char ch;
int freq;
Node left, right;
Node(char c, int f) {
ch = c;
freq = f;
}
}
private void generateCodes(Node node, String code, Map<Character, String> ans) {
if (node == null) return;
if (node.left == null && node.right == null) {
ans.put(node.ch, code);
return;
}
generateCodes(node.left, code + "0", ans);
generateCodes(node.right, code + "1", ans);
}
}
Kotlin
class Solution {
fun huffmanEncoding(freq: Map<Char, Int>): Map<Char, String> {
if (freq.size == 1) {
return mapOf(freq.keys.first() to "0")
}
val pq = PriorityQueue<Node> { a, b -> a.freq - b.freq }
for ((ch, f) in freq) {
pq.offer(Node(ch, f))
}
while (pq.size > 1) {
val left = pq.poll()
val right = pq.poll()
val merged = Node('\u0000', left.freq + right.freq).apply {
this.left = left
this.right = right
}
pq.offer(merged)
}
val root = pq.poll()
val ans = mutableMapOf<Char, String>()
generateCodes(root, "", ans)
return ans
}
data class Node(
val ch: Char,
val freq: Int,
var left: Node? = null,
var right: Node? = null
)
private fun generateCodes(node: Node?, code: String, ans: MutableMap<Char, String>) {
if (node == null) return
if (node.left == null && node.right == null) {
ans[node.ch] = code
return
}
generateCodes(node.left, code + "0", ans)
generateCodes(node.right, code + "1", ans)
}
}
Python
class Solution:
def huffmanEncoding(self, freq: Dict[str, int]) -> Dict[str, str]:
if len(freq) == 1:
return {list(freq.keys())[0]: "0"}
heap = [Node(ch, f) for ch, f in freq.items()]
heapq.heapify(heap)
while len(heap) > 1:
left = heapq.heappop(heap)
right = heapq.heappop(heap)
merged = Node(None, left.freq + right.freq)
merged.left = left
merged.right = right
heapq.heappush(heap, merged)
root = heap[0]
ans = {}
self.generateCodes(root, "", ans)
return ans
def generateCodes(self, node: 'Node', code: str, ans: Dict[str, str]) -> None:
if not node:
return
if not node.left and not node.right:
ans[node.ch] = code
return
self.generateCodes(node.left, code + "0", ans)
self.generateCodes(node.right, code + "1", ans)
class Node:
def __init__(self, ch: str, freq: int):
self.ch = ch
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other: 'Node') -> bool:
return self.freq < other.freq
Rust
use std::collections::{HashMap, BinaryHeap};
use std::cmp::Reverse;
impl Solution {
pub fn huffman_encoding(freq: HashMap<char, i32>) -> HashMap<char, String> {
if freq.len() == 1 {
let mut ans = HashMap::new();
ans.insert(*freq.keys().next().unwrap(), "0".to_string());
return ans;
}
let mut heap = BinaryHeap::new();
for (ch, f) in freq {
heap.push(Reverse(Node::new(Some(ch), f)));
}
while heap.len() > 1 {
let Reverse(left) = heap.pop().unwrap();
let Reverse(right) = heap.pop().unwrap();
let merged = Node {
ch: None,
freq: left.freq + right.freq,
left: Some(Box::new(left)),
right: Some(Box::new(right)),
};
heap.push(Reverse(merged));
}
let Reverse(root) = heap.pop().unwrap();
let mut ans = HashMap::new();
Self::generate_codes(&root, String::new(), &mut ans);
ans
}
fn generate_codes(node: &Node, code: String, ans: &mut HashMap<char, String>) {
if node.left.is_none() && node.right.is_none() {
if let Some(ch) = node.ch {
ans.insert(ch, code);
}
return;
}
if let Some(ref left) = node.left {
Self::generate_codes(left, code.clone() + "0", ans);
}
if let Some(ref right) = node.right {
Self::generate_codes(right, code + "1", ans);
}
}
}
#[derive(Eq, PartialEq)]
struct Node {
ch: Option<char>,
freq: i32,
left: Option<Box<Node>>,
right: Option<Box<Node>>,
}
impl Node {
fn new(ch: Option<char>, freq: i32) -> Self {
Node {
ch,
freq,
left: None,
right: None,
}
}
}
impl Ord for Node {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.freq.cmp(&other.freq)
}
}
impl PartialOrd for Node {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
TypeScript
class Solution {
huffmanEncoding(freq: Map<string, number>): Map<string, string> {
if (freq.size === 1) {
const ans = new Map<string, string>();
ans.set(freq.keys().next().value, "0");
return ans;
}
const pq = new PriorityQueue<Node>((a, b) => a.freq - b.freq);
for (const [ch, f] of freq) {
pq.enqueue(new Node(ch, f));
}
while (pq.size() > 1) {
const left = pq.dequeue()!;
const right = pq.dequeue()!;
const merged = new Node("", left.freq + right.freq);
merged.left = left;
merged.right = right;
pq.enqueue(merged);
}
const root = pq.dequeue()!;
const ans = new Map<string, string>();
this.generateCodes(root, "", ans);
return ans;
}
private generateCodes(node: Node | null, code: string, ans: Map<string, string>): void {
if (!node) return;
if (!node.left && !node.right) {
ans.set(node.ch, code);
return;
}
this.generateCodes(node.left, code + "0", ans);
this.generateCodes(node.right, code + "1", ans);
}
}
class Node {
ch: string;
freq: number;
left: Node | null = null;
right: Node | null = null;
constructor(ch: string, freq: number) {
this.ch = ch;
this.freq = freq;
}
}
class PriorityQueue<T> {
private items: T[] = [];
private compare: (a: T, b: T) => number;
constructor(compareFn: (a: T, b: T) => number) {
this.compare = compareFn;
}
enqueue(item: T): void {
this.items.push(item);
this.bubbleUp();
}
dequeue(): T | undefined {
if (this.items.length === 0) return undefined;
if (this.items.length === 1) return this.items.pop();
const result = this.items[0];
this.items[0] = this.items.pop()!;
this.bubbleDown();
return result;
}
size(): number {
return this.items.length;
}
private bubbleUp(): void {
let index = this.items.length - 1;
while (index > 0) {
const parentIndex = Math.floor((index - 1) / 2);
if (this.compare(this.items[index], this.items[parentIndex]) >= 0) break;
[this.items[index], this.items[parentIndex]] = [this.items[parentIndex], this.items[index]];
index = parentIndex;
}
}
private bubbleDown(): void {
let index = 0;
while (true) {
const leftChildIndex = 2 * index + 1;
const rightChildIndex = 2 * index + 2;
let smallest = index;
if (leftChildIndex < this.items.length &&
this.compare(this.items[leftChildIndex], this.items[smallest]) < 0) {
smallest = leftChildIndex;
}
if (rightChildIndex < this.items.length &&
this.compare(this.items[rightChildIndex], this.items[smallest]) < 0) {
smallest = rightChildIndex;
}
if (smallest === index) break;
[this.items[index], this.items[smallest]] = [this.items[smallest], this.items[index]];
index = smallest;
}
}
}
Complexity
- ⏰ Time complexity:
O(n log n), where n is the number of unique characters. Building the heap takes O(n), and we perform O(n) extract-min and insert operations, each taking O(log n) - 🧺 Space complexity:
O(n)for storing the tree nodes and the priority queue, plus O(h) for recursion depth where h is the height of the tree (up to O(n) in worst case)