Huffman Coding
1. Algorithm & Complexity
Huffman coding is a greedy algorithm to build a minimum-redundancy prefix code for a set of symbols with given frequencies.
- Count frequencies of each symbol; create a leaf node for each symbol.
- Insert all nodes into a min-heap keyed by frequency.
- While heap size > 1:
- Extract the two nodes of smallest frequency.
- Create a new internal node with these two as children; its frequency = sum of theirs.
- Insert the new node back into the heap.
- The remaining node is the root of the Huffman tree. Assign codes by traversing: left edge = ‘0’, right edge = ‘1’. Each leaf’s path is its code.
Time Complexity
- Building the initial heap: O(n)
- Each of the n−1 merges involves two
pop
+ onepush
: O(log n) each → O(n log n) total.
Total: O(n log n), where n = number of distinct symbols.
Memory Complexity
- Storing frequencies & heap: O(n)
- Storing the tree itself: O(n)
Total: O(n).
2. Easy Problems
2.1 Codeforces 700D – Alice and Huffman
Given k symbols with frequencies, compute the minimum total cost of the Huffman merge process (i.e. sum of all internal node frequencies).
Input: 5 5 3 8 2 6 Output: 58
#include <iostream>
#include <queue>
using namespace std;
int main() {
int k;
cin >> k;
priority_queue, greater> pq;
for (int i = 0; i < k; i++) {
long long f; cin >> f;
pq.push(f);
}
long long cost = 0;
while (pq.size() > 1) {
long long a = pq.top(); pq.pop();
long long b = pq.top(); pq.pop();
cost += a + b;
pq.push(a + b);
}
cout << cost << "\n";
return 0;
}
2.2 gym.acm.sgu 203 – Encoded Length
Given frequencies of symbols, and given code lengths for each symbol, compute the total encoded message length and verify it matches the Huffman optimum.
Input: 4 10 15 30 45 2 2 1 1 Output: 140
#include <iostream>
using namespace std;
int main() {
int n;
cin >> n;
vector freq(n), len(n);
for (int i = 0; i < n; i++) cin >> freq[i];
for (int i = 0; i < n; i++) cin >> len[i];
long long total = 0;
for (int i = 0; i < n; i++)
total += freq[i] * len[i];
cout << total << "\n";
return 0;
}
3. Intermediate Problems
3.1 Codeforces 92577 – Optimal Merge Pattern
Given n sorted files with sizes, find the minimum total cost to merge them into one file (each merge cost = sum of sizes merged).
Input: 6 2 3 4 5 6 7 Output: 68
#include <iostream>
#include <queue>
using namespace std;
int main() {
int n;
cin >> n;
priority_queue, greater> pq;
for (int i = 0; i < n; i++) {
long long s; cin >> s;
pq.push(s);
}
long long cost = 0;
while (pq.size() > 1) {
long long x = pq.top(); pq.pop();
long long y = pq.top(); pq.pop();
cost += x + y;
pq.push(x + y);
}
cout << cost << "\n";
return 0;
}
3.2 Codeforces Education Round 31 – D
Divide a sequence into groups and merge adjacent groups with minimal total “merge cost” following Huffman-style merges.
Input: 4 10 1 10 1 Output: 22
#include <iostream>
#include <queue>
using namespace std;
int main() {
int n;
cin >> n;
priority_queue, greater> pq;
for (int i = 0; i < n; i++) {
long long v; cin >> v;
pq.push(v);
}
long long ans = 0;
while (pq.size() > 1) {
long long a = pq.top(); pq.pop();
long long b = pq.top(); pq.pop();
ans += a + b;
pq.push(a + b);
}
cout << ans << "\n";
return 0;
}
4. Hard Problem
4.1 Codeforces 700D – Full Huffman Tree Reconstruction
Given k symbols with frequencies, reconstruct any optimal Huffman tree and output each symbol’s binary code.
Input: 3 5 9 12 a b c Output: a: 10 b: 11 c: 0
#include <iostream>
#include <queue>
#include <vector>>
#include <string>>
using namespace std;
struct Node {
long long freq;
int idx; // symbol index or -1
Node* left; // 0-child
Node* right; // 1-child
Node(long long f,int i):freq(f),idx(i),left(0),right(0){}
};
struct Cmp {
bool operator()(Node* a, Node* b) const {
return a->freq > b->freq;
}
};
void buildCodes(Node* root, vector& code, string s="") {
if (!root) return;
if (root->idx >= 0) {
code[root->idx] = s.size()? s : "0";
} else {
buildCodes(root->left, code, s + "0");
buildCodes(root->right, code, s + "1");
}
}
int main() {
int k;
cin >> k;
vector freq(k);
for (int i = 0; i < k; i++) cin >> freq[i];
vector sym(k);
for (int i = 0; i < k; i++) cin >> sym[i];
priority_queue, Cmp> pq;
for (int i = 0; i < k; i++)
pq.push(new Node(freq[i], i));
// build tree
while (pq.size() > 1) {
Node* a = pq.top(); pq.pop();
Node* b = pq.top(); pq.pop();
Node* w = new Node(a->freq + b->freq, -1);
w->left = b; // smaller freq on left
w->right = a;
pq.push(w);
}
Node* root = pq.top();
// generate codes
vector code(k);
buildCodes(root, code);
for (int i = 0; i < k; i++) {
cout << sym[i] << ": " << code[i] << "\n";
}
return 0;
}