Problem

Huffman coding is a method of encoding characters based on their frequency. Each letter is assigned a variable-length binary string, such as 0101 or 111110, where shorter lengths correspond to more common letters. To accomplish this, a binary tree is built such that the path from the root to any leaf uniquely maps to a character. When traversing the path, descending to a left child corresponds to a 0 in the prefix, while descending right corresponds to 1.

Here is an example tree (note that only the leaf nodes have letters):

1
2
3
4
5
6
7
        *
      /   \
    *       *
   / \     / \
  *   a   t   *
 /             \
c               s

With this encoding, cats would be represented as 0000110111.

Given a dictionary of character frequencies, build a Huffman tree, and use it to determine a mapping between characters and their encoded binary strings.

Examples

Example 1

1
2
3
Input: {'c': 1, 'a': 1, 't': 1, 's': 1}
Output: {'c': '000', 'a': '01', 't': '10', 's': '11'}
Explanation: All characters have equal frequency, so the tree can be built in various ways. One valid encoding assigns 'c' the longest path and others shorter paths.

Example 2

1
2
3
Input: {'a': 5, 'b': 2, 'c': 1, 'd': 1}
Output: {'a': '0', 'b': '10', 'c': '110', 'd': '111'}
Explanation: 'a' is most frequent so gets shortest code '0'. Less frequent characters get longer codes.

Example 3

1
2
3
Input: {'x': 10, 'y': 5, 'z': 3}
Output: {'x': '0', 'y': '10', 'z': '11'}
Explanation: With three characters, 'x' gets the shortest code due to highest frequency.

Example 4

1
2
3
Input: {'e': 8, 't': 6, 'a': 4, 'o': 2, 'i': 2, 'n': 1, 's': 1}
Output: {'e': '00', 't': '01', 'a': '10', 'o': '110', 'i': '1110', 'n': '11110', 's': '11111'}
Explanation: More realistic example showing how frequency determines code length - most frequent characters get shorter codes.

Method 1 - Priority Queue with Tree Construction

Intuition

The Huffman algorithm works by repeatedly combining the two nodes with the lowest frequencies. We start with each character as a leaf node, then build the tree bottom-up by merging nodes. Characters with higher frequencies end up closer to the root (shorter paths), while rare characters get longer paths.

Approach

  1. Create leaf nodes for each character with their frequencies
  2. Use a priority queue (min-heap) to always get the two nodes with lowest frequencies
  3. Repeatedly extract two minimum nodes and create a new internal node with them as children
  4. Continue until only one node remains (the root)
  5. Traverse the tree to generate binary codes for each character
  6. Return the character-to-code mapping

Code Implementation

C++

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class Solution {
public:
    map<char, string> huffmanEncoding(map<char, int>& freq) {
        if (freq.size() == 1) {
            map<char, string> ans;
            ans[freq.begin()->first] = "0";
            return ans;
        }
        
        priority_queue<Node*, vector<Node*>, Compare> pq;
        
        for (auto& p : freq) {
            pq.push(new Node(p.first, p.second));
        }
        
        while (pq.size() > 1) {
            Node* right = pq.top(); pq.pop();
            Node* left = pq.top(); pq.pop();
            
            Node* merged = new Node('\0', left->freq + right->freq);
            merged->left = left;
            merged->right = right;
            pq.push(merged);
        }
        
        Node* root = pq.top();
        map<char, string> ans;
        generateCodes(root, "", ans);
        return ans;
    }
    
private:
    struct Node {
        char ch;
        int freq;
        Node* left;
        Node* right;
        
        Node(char c, int f) : ch(c), freq(f), left(nullptr), right(nullptr) {}
    };
    
    struct Compare {
        bool operator()(Node* a, Node* b) {
            return a->freq > b->freq;
        }
    };
    
    void generateCodes(Node* node, string code, map<char, string>& ans) {
        if (!node) return;
        
        if (!node->left && !node->right) {
            ans[node->ch] = code;
            return;
        }
        
        generateCodes(node->left, code + "0", ans);
        generateCodes(node->right, code + "1", ans);
    }
};

Go

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import (
    "container/heap"
)

type Node struct {
    ch    rune
    freq  int
    left  *Node
    right *Node
}

type PriorityQueue []*Node

func (pq PriorityQueue) Len() int { return len(pq) }

func (pq PriorityQueue) Less(i, j int) bool {
    return pq[i].freq < pq[j].freq
}

func (pq PriorityQueue) Swap(i, j int) {
    pq[i], pq[j] = pq[j], pq[i]
}

func (pq *PriorityQueue) Push(x interface{}) {
    *pq = append(*pq, x.(*Node))
}

func (pq *PriorityQueue) Pop() interface{} {
    old := *pq
    n := len(old)
    item := old[n-1]
    *pq = old[0 : n-1]
    return item
}

func huffmanEncoding(freq map[rune]int) map[rune]string {
    if len(freq) == 1 {
        ans := make(map[rune]string)
        for ch := range freq {
            ans[ch] = "0"
        }
        return ans
    }
    
    pq := &PriorityQueue{}
    heap.Init(pq)
    
    for ch, f := range freq {
        heap.Push(pq, &Node{ch: ch, freq: f})
    }
    
    for pq.Len() > 1 {
        left := heap.Pop(pq).(*Node)
        right := heap.Pop(pq).(*Node)
        
        merged := &Node{
            ch:   0,
            freq: left.freq + right.freq,
            left: left,
            right: right,
        }
        heap.Push(pq, merged)
    }
    
    root := heap.Pop(pq).(*Node)
    ans := make(map[rune]string)
    generateCodes(root, "", ans)
    return ans
}

func generateCodes(node *Node, code string, ans map[rune]string) {
    if node == nil {
        return
    }
    
    if node.left == nil && node.right == nil {
        ans[node.ch] = code
        return
    }
    
    generateCodes(node.left, code+"0", ans)
    generateCodes(node.right, code+"1", ans)
}

Java

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class Solution {
    public Map<Character, String> huffmanEncoding(Map<Character, Integer> freq) {
        if (freq.size() == 1) {
            Map<Character, String> ans = new HashMap<>();
            ans.put(freq.keySet().iterator().next(), "0");
            return ans;
        }
        
        PriorityQueue<Node> pq = new PriorityQueue<>((a, b) -> a.freq - b.freq);
        
        for (Map.Entry<Character, Integer> entry : freq.entrySet()) {
            pq.offer(new Node(entry.getKey(), entry.getValue()));
        }
        
        while (pq.size() > 1) {
            Node left = pq.poll();
            Node right = pq.poll();
            
            Node merged = new Node('\0', left.freq + right.freq);
            merged.left = left;
            merged.right = right;
            pq.offer(merged);
        }
        
        Node root = pq.poll();
        Map<Character, String> ans = new HashMap<>();
        generateCodes(root, "", ans);
        return ans;
    }
    
    class Node {
        char ch;
        int freq;
        Node left, right;
        
        Node(char c, int f) {
            ch = c;
            freq = f;
        }
    }
    
    private void generateCodes(Node node, String code, Map<Character, String> ans) {
        if (node == null) return;
        
        if (node.left == null && node.right == null) {
            ans.put(node.ch, code);
            return;
        }
        
        generateCodes(node.left, code + "0", ans);
        generateCodes(node.right, code + "1", ans);
    }
}

Kotlin

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class Solution {
    fun huffmanEncoding(freq: Map<Char, Int>): Map<Char, String> {
        if (freq.size == 1) {
            return mapOf(freq.keys.first() to "0")
        }
        
        val pq = PriorityQueue<Node> { a, b -> a.freq - b.freq }
        
        for ((ch, f) in freq) {
            pq.offer(Node(ch, f))
        }
        
        while (pq.size > 1) {
            val left = pq.poll()
            val right = pq.poll()
            
            val merged = Node('\u0000', left.freq + right.freq).apply {
                this.left = left
                this.right = right
            }
            pq.offer(merged)
        }
        
        val root = pq.poll()
        val ans = mutableMapOf<Char, String>()
        generateCodes(root, "", ans)
        return ans
    }
    
    data class Node(
        val ch: Char,
        val freq: Int,
        var left: Node? = null,
        var right: Node? = null
    )
    
    private fun generateCodes(node: Node?, code: String, ans: MutableMap<Char, String>) {
        if (node == null) return
        
        if (node.left == null && node.right == null) {
            ans[node.ch] = code
            return
        }
        
        generateCodes(node.left, code + "0", ans)
        generateCodes(node.right, code + "1", ans)
    }
}

Python

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class Solution:
    def huffmanEncoding(self, freq: Dict[str, int]) -> Dict[str, str]:
        if len(freq) == 1:
            return {list(freq.keys())[0]: "0"}
        
        heap = [Node(ch, f) for ch, f in freq.items()]
        heapq.heapify(heap)
        
        while len(heap) > 1:
            left = heapq.heappop(heap)
            right = heapq.heappop(heap)
            
            merged = Node(None, left.freq + right.freq)
            merged.left = left
            merged.right = right
            heapq.heappush(heap, merged)
        
        root = heap[0]
        ans = {}
        self.generateCodes(root, "", ans)
        return ans
    
    def generateCodes(self, node: 'Node', code: str, ans: Dict[str, str]) -> None:
        if not node:
            return
        
        if not node.left and not node.right:
            ans[node.ch] = code
            return
        
        self.generateCodes(node.left, code + "0", ans)
        self.generateCodes(node.right, code + "1", ans)

class Node:
    def __init__(self, ch: str, freq: int):
        self.ch = ch
        self.freq = freq
        self.left = None
        self.right = None
    
    def __lt__(self, other: 'Node') -> bool:
        return self.freq < other.freq

Rust

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
use std::collections::{HashMap, BinaryHeap};
use std::cmp::Reverse;

impl Solution {
    pub fn huffman_encoding(freq: HashMap<char, i32>) -> HashMap<char, String> {
        if freq.len() == 1 {
            let mut ans = HashMap::new();
            ans.insert(*freq.keys().next().unwrap(), "0".to_string());
            return ans;
        }
        
        let mut heap = BinaryHeap::new();
        
        for (ch, f) in freq {
            heap.push(Reverse(Node::new(Some(ch), f)));
        }
        
        while heap.len() > 1 {
            let Reverse(left) = heap.pop().unwrap();
            let Reverse(right) = heap.pop().unwrap();
            
            let merged = Node {
                ch: None,
                freq: left.freq + right.freq,
                left: Some(Box::new(left)),
                right: Some(Box::new(right)),
            };
            heap.push(Reverse(merged));
        }
        
        let Reverse(root) = heap.pop().unwrap();
        let mut ans = HashMap::new();
        Self::generate_codes(&root, String::new(), &mut ans);
        ans
    }
    
    fn generate_codes(node: &Node, code: String, ans: &mut HashMap<char, String>) {
        if node.left.is_none() && node.right.is_none() {
            if let Some(ch) = node.ch {
                ans.insert(ch, code);
            }
            return;
        }
        
        if let Some(ref left) = node.left {
            Self::generate_codes(left, code.clone() + "0", ans);
        }
        if let Some(ref right) = node.right {
            Self::generate_codes(right, code + "1", ans);
        }
    }
}

#[derive(Eq, PartialEq)]
struct Node {
    ch: Option<char>,
    freq: i32,
    left: Option<Box<Node>>,
    right: Option<Box<Node>>,
}

impl Node {
    fn new(ch: Option<char>, freq: i32) -> Self {
        Node {
            ch,
            freq,
            left: None,
            right: None,
        }
    }
}

impl Ord for Node {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.freq.cmp(&other.freq)
    }
}

impl PartialOrd for Node {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

TypeScript

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
class Solution {
    huffmanEncoding(freq: Map<string, number>): Map<string, string> {
        if (freq.size === 1) {
            const ans = new Map<string, string>();
            ans.set(freq.keys().next().value, "0");
            return ans;
        }
        
        const pq = new PriorityQueue<Node>((a, b) => a.freq - b.freq);
        
        for (const [ch, f] of freq) {
            pq.enqueue(new Node(ch, f));
        }
        
        while (pq.size() > 1) {
            const left = pq.dequeue()!;
            const right = pq.dequeue()!;
            
            const merged = new Node("", left.freq + right.freq);
            merged.left = left;
            merged.right = right;
            pq.enqueue(merged);
        }
        
        const root = pq.dequeue()!;
        const ans = new Map<string, string>();
        this.generateCodes(root, "", ans);
        return ans;
    }
    
    private generateCodes(node: Node | null, code: string, ans: Map<string, string>): void {
        if (!node) return;
        
        if (!node.left && !node.right) {
            ans.set(node.ch, code);
            return;
        }
        
        this.generateCodes(node.left, code + "0", ans);
        this.generateCodes(node.right, code + "1", ans);
    }
}

class Node {
    ch: string;
    freq: number;
    left: Node | null = null;
    right: Node | null = null;
    
    constructor(ch: string, freq: number) {
        this.ch = ch;
        this.freq = freq;
    }
}

class PriorityQueue<T> {
    private items: T[] = [];
    private compare: (a: T, b: T) => number;
    
    constructor(compareFn: (a: T, b: T) => number) {
        this.compare = compareFn;
    }
    
    enqueue(item: T): void {
        this.items.push(item);
        this.bubbleUp();
    }
    
    dequeue(): T | undefined {
        if (this.items.length === 0) return undefined;
        if (this.items.length === 1) return this.items.pop();
        
        const result = this.items[0];
        this.items[0] = this.items.pop()!;
        this.bubbleDown();
        return result;
    }
    
    size(): number {
        return this.items.length;
    }
    
    private bubbleUp(): void {
        let index = this.items.length - 1;
        while (index > 0) {
            const parentIndex = Math.floor((index - 1) / 2);
            if (this.compare(this.items[index], this.items[parentIndex]) >= 0) break;
            
            [this.items[index], this.items[parentIndex]] = [this.items[parentIndex], this.items[index]];
            index = parentIndex;
        }
    }
    
    private bubbleDown(): void {
        let index = 0;
        while (true) {
            const leftChildIndex = 2 * index + 1;
            const rightChildIndex = 2 * index + 2;
            let smallest = index;
            
            if (leftChildIndex < this.items.length && 
                this.compare(this.items[leftChildIndex], this.items[smallest]) < 0) {
                smallest = leftChildIndex;
            }
            
            if (rightChildIndex < this.items.length && 
                this.compare(this.items[rightChildIndex], this.items[smallest]) < 0) {
                smallest = rightChildIndex;
            }
            
            if (smallest === index) break;
            
            [this.items[index], this.items[smallest]] = [this.items[smallest], this.items[index]];
            index = smallest;
        }
    }
}

Complexity

  • ⏰ Time complexity: O(n log n), where n is the number of unique characters. Building the heap takes O(n), and we perform O(n) extract-min and insert operations, each taking O(log n)
  • 🧺 Space complexity: O(n) for storing the tree nodes and the priority queue, plus O(h) for recursion depth where h is the height of the tree (up to O(n) in worst case)