Persistent Data Structures
Introduction
Persistent data structures maintain all previous versions of themselves when modified. This allows efficient querying of past states without altering the current state.
Types of Persistence
- Partial Persistence: Only the latest version can be modified, but all versions can be queried
- Full Persistence: Any version can be modified or queried
- Confluent Persistence: Versions can be merged
Mathematics Behind Persistence
The key mathematical concept is path copying:
- When modifying a node, we create a new copy of it and all nodes along the path to the root
- This creates a new version while preserving the old one
- The number of new nodes created per update is O(log n) for balanced trees
Version Tree: Each version forms a node in a version tree, where edges represent modifications.
Time and Space Complexity
- Persistent Segment Tree: O(log n) time per query/update, O(m log n) space for m versions
- Persistent Union-Find: O(α(n)) time per operation, O(n + mα(n)) space
- Persistent Trie: O(L) time per operation (L = key length), O(mL) space
Problem Examples
Easy Problems
1. Persistent Stack (Codeforces EDU)
Implement a stack that supports push, pop, and querying previous versions.
#include <iostream>
#include <vector>
using namespace std;
struct Node {
int value;
Node* prev;
Node(int v, Node* p) : value(v), prev(p) {}
};
vector<Node*> versions;
void push(int version, int x) {
Node* new_node = new Node(x, versions[version]);
versions.push_back(new_node);
}
int pop(int version) {
Node* top = versions[version];
versions.push_back(top->prev);
return top->value;
}
int top(int version) {
return versions[version]->value;
}
int main() {
versions.push_back(nullptr); // version 0: empty stack
push(0, 1); // version 1
push(1, 2); // version 2
push(2, 3); // version 3
cout << top(3) << endl; // 3
pop(3); // version 4
cout << top(4) << endl; // 2
push(2, 4); // version 5 (branch from version 2)
cout << top(5) << endl; // 4
return 0;
}
Input/Output Example
Output: 3 2 4
2. Persistent Array (Codeforces 707D)
Implement an array that supports updates and queries on previous versions.
#include <iostream>
#include <vector>
using namespace std;
struct Node {
int value;
Node* left;
Node* right;
Node(int v) : value(v), left(nullptr), right(nullptr) {}
};
Node* build(int l, int r, const vector<int>& arr) {
Node* node = new Node(0);
if (l == r) {
node->value = arr[l];
} else {
int mid = (l + r) / 2;
node->left = build(l, mid, arr);
node->right = build(mid+1, r, arr);
}
return node;
}
Node* update(Node* root, int l, int r, int idx, int value) {
Node* new_node = new Node(root->value);
if (l == r) {
new_node->value = value;
} else {
int mid = (l + r) / 2;
if (idx <= mid) {
new_node->left = update(root->left, l, mid, idx, value);
new_node->right = root->right;
} else {
new_node->left = root->left;
new_node->right = update(root->right, mid+1, r, idx, value);
}
}
return new_node;
}
int query(Node* root, int l, int r, int idx) {
if (l == r) return root->value;
int mid = (l + r) / 2;
if (idx <= mid) return query(root->left, l, mid, idx);
else return query(root->right, mid+1, r, idx);
}
int main() {
vector<int> arr = {1, 2, 3, 4};
vector<Node*> versions;
versions.push_back(build(0, 3, arr)); // version 0
Node* v1 = update(versions[0], 0, 3, 1, 10); // version 1
versions.push_back(v1);
Node* v2 = update(versions[1], 0, 3, 3, 20); // version 2
versions.push_back(v2);
cout << query(versions[0], 0, 3, 1) << endl; // 2
cout << query(versions[1], 0, 3, 1) << endl; // 10
cout << query(versions[2], 0, 3, 3) << endl; // 20
return 0;
}
Intermediate Problems
1. Persistent Segment Tree (Codeforces 840D)
Find the k-th smallest element in a range across different versions.
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
struct Node {
int count;
Node* left;
Node* right;
Node() : count(0), left(nullptr), right(nullptr) {}
};
vector<Node*> roots;
vector<int> sorted;
Node* build(int l, int r) {
Node* node = new Node();
if (l != r) {
int mid = (l + r) / 2;
node->left = build(l, mid);
node->right = build(mid+1, r);
}
return node;
}
Node* update(Node* prev, int l, int r, int idx) {
Node* node = new Node();
*node = *prev;
node->count++;
if (l != r) {
int mid = (l + r) / 2;
if (idx <= mid) node->left = update(prev->left, l, mid, idx);
else node->right = update(prev->right, mid+1, r, idx);
}
return node;
}
int query(Node* u, Node* v, int l, int r, int k) {
if (l == r) return l;
int mid = (l + r) / 2;
int left_count = v->left->count - u->left->count;
if (left_count >= k) return query(u->left, v->left, l, mid, k);
return query(u->right, v->right, mid+1, r, k - left_count);
}
int main() {
vector<int> arr = {1, 3, 2, 5, 4};
sorted = arr;
sort(sorted.begin(), sorted.end());
roots.push_back(build(0, 4)); // version 0
for (int x : arr) {
int idx = lower_bound(sorted.begin(), sorted.end(), x) - sorted.begin();
roots.push_back(update(roots.back(), 0, 4, idx));
}
// Query k-th smallest in [L, R] (1-based)
int L = 2, R = 4, k = 2;
cout << sorted[query(roots[L-1], roots[R], 0, 4, k)] << endl; // 4
return 0;
}
2. Persistent Trie (Codeforces 706D)
Implement a XOR trie that supports adding numbers and querying maximum XOR with any number in previous versions.
#include <iostream>
#include <vector>
using namespace std;
struct TrieNode {
TrieNode* children[2];
int count;
TrieNode() : children{nullptr, nullptr}, count(0) {}
};
vector<TrieNode*> roots;
void insert(TrieNode* prev, TrieNode* curr, int num, int bit) {
if (bit < 0) return;
int b = (num >> bit) & 1;
curr->children[b] = new TrieNode();
if (prev && prev->children[b]) {
*curr->children[b] = *prev->children[b];
}
curr->children[b]->count++;
insert(prev ? prev->children[b] : nullptr,
curr->children[b], num, bit-1);
}
int max_xor(TrieNode* node, int num, int bit) {
if (bit < 0) return 0;
int b = (num >> bit) & 1;
int flip = 1 - b;
if (node->children[flip] && node->children[flip]->count > 0) {
return (1 << bit) | max_xor(node->children[flip], num, bit-1);
}
return max_xor(node->children[b], num, bit-1);
}
int main() {
roots.push_back(new TrieNode()); // version 0
// Version 1: insert 3
roots.push_back(new TrieNode());
insert(roots[0], roots[1], 3, 30);
// Version 2: insert 5
roots.push_back(new TrieNode());
insert(roots[1], roots[2], 5, 30);
cout << max_xor(roots[1], 6, 30) << endl; // 5 (3^6=5)
cout << max_xor(roots[2], 6, 30) << endl; // 3 (5^6=3)
return 0;
}
Hard Problem
1. Persistent Union-Find (Codeforces 813F)
Implement a dynamic connectivity structure that supports adding/removing edges and querying connectivity across versions.
#include <iostream>
#include <vector>
#include <map>
using namespace std;
struct Node {
int parent, rank;
Node(int p = 0, int r = 0) : parent(p), rank(r) {}
};
struct Update {
int u, v;
bool united;
Update(int _u, int _v, bool _united) : u(_u), v(_v), united(_united) {}
};
vector<vector<Node>> snapshots;
vector<vector<Update>> updates;
vector<int> parent, rank;
void make_set(int n) {
parent.resize(n+1);
rank.resize(n+1);
for (int i = 1; i <= n; i++) {
parent[i] = i;
rank[i] = 0;
}
}
int find(int u) {
if (parent[u] != u) return find(parent[u]);
return u;
}
bool unite(int u, int v) {
u = find(u);
v = find(v);
if (u == v) return false;
if (rank[u] < rank[v]) swap(u, v);
updates.back().emplace_back(v, u, rank[u] == rank[v]);
parent[v] = u;
if (rank[u] == rank[v]) rank[u]++;
return true;
}
void persist() {
snapshots.push_back(vector<Node>(parent.size()));
for (int i = 1; i < parent.size(); i++) {
snapshots.back()[i] = Node(parent[i], rank[i]);
}
}
void rollback(int version) {
parent.resize(snapshots[version].size());
rank.resize(snapshots[version].size());
for (int i = 1; i < snapshots[version].size(); i++) {
parent[i] = snapshots[version][i].parent;
rank[i] = snapshots[version][i].rank;
}
}
int main() {
int n = 4;
make_set(n);
// Version 0: initial state
persist();
// Version 1: add edge 1-2
updates.emplace_back();
unite(1, 2);
persist();
// Version 2: add edge 3-4
updates.emplace_back();
unite(3, 4);
persist();
// Query connectivity in version 1
rollback(1);
cout << (find(1) == find(2)) << endl; // 1 (connected)
cout << (find(1) == find(3)) << endl; // 0 (not connected)
// Query connectivity in version 2
rollback(2);
cout << (find(1) == find(3)) << endl; // 0 (not connected)
cout << (find(3) == find(4)) << endl; // 1 (connected)
return 0;
}