Rolling Hashes
1. Algorithm Overview
A rolling hash is a hash function designed to enable efficient computation of hashes of all substrings of a given length in a text, by reusing the previous hash. For a string s of length n, define the hash of the prefix s[0..i) as:
H(i) = (s[0]·pi−1 + s[1]·pi−2 + … + s[i−1]·p0) mod M
Here, p is a chosen base (e.g. 31 or 257), and M is a large prime modulus (e.g. 109+7). To compute the hash of s[i..i+ℓ) in O(1) time, we precompute power[k] = pk mod M and prefix hashes H(i) for all i. Then:
hash(s[i..i+ℓ)) = ( H(i+ℓ) − H(i)·power[ℓ] % M + M ) % M
2. Time & Memory Complexity
- Preprocessing: O(n) to compute H(0..n) and power[0..n].
- Querying one substring hash: O(1).
- Total for scanning all substrings of length ℓ: O(n).
- Memory: O(n) for the H and power arrays.
3. Easy Problems
3.1. Problem 1: Rabin–Karp String Search
Find all occurrences of a pattern P (length m) in text T (length n) in average O(n+m) time.
#include <iostream>
#include <vector>
#include <string>
using namespace std;
using ull = unsigned long long;
vector rabinKarp(const string& T, const string& P) {
int n = T.size(), m = P.size();
const ull p = 1315423911ULL;
ull hashP = 0, hashT = 0, power = 1;
for (int i = 0; i < m; i++) {
hashP = hashP * p + P[i];
hashT = hashT * p + T[i];
if (i > 0) power *= p;
}
vector res;
if (hashP == hashT) res.push_back(0);
for (int i = m; i < n; i++) {
hashT = hashT - T[i-m]*power;
hashT = hashT * p + T[i];
if (hashT == hashP) res.push_back(i-m+1);
}
return res;
}
int main() {
string T = "abracadabra", P = "abra";
auto occ = rabinKarp(T, P);
for (int idx : occ) cout << idx << " ";
return 0;
}
3.2. Problem 2: Check for Duplicate Substrings of Length K
Given a string S and integer K, determine if any length-K substring appears at least twice.
#include <iostream>
#include <unordered_set>
#include <string>
using namespace std;
using ull = unsigned long long;
bool hasDuplicateK(const string& S, int K) {
int n = S.size();
if (K > n) return false;
const ull p = 1315423911ULL;
ull hash = 0, power = 1;
for (int i = 0; i < K; i++) {
hash = hash * p + S[i];
if (i > 0) power *= p;
}
unordered_set<ull> seen;
seen.insert(hash);
for (int i = K; i < n; i++) {
hash = hash - S[i-K]*power;
hash = hash * p + S[i];
if (seen.count(hash)) return true;
seen.insert(hash);
}
return false;
}
int main() {
cout << (hasDuplicateK("banana", 2) ? "Yes" : "No");
return 0;
}
4. Intermediate Problems
4.1. Problem 3: Longest Common Substring of Two Strings
Given strings A and B, find the length of their longest common substring. Use binary search on length + rolling hash to test existence in O((n+m)·log min(n,m)).
#include <iostream>
#include <vector>
#include <unordered_set>
#include <string>>
using namespace std;
using ull = unsigned long long;
bool hasCommonLen(int L, const string& A, const string& B) {
if (L == 0) return true;
const ull p = 1315423911ULL;
ull hashA = 0, power = 1;
unordered_set<ull> seen;
for (int i = 0; i < L; i++) {
hashA = hashA * p + A[i];
if (i > 0) power *= p;
}
seen.insert(hashA);
for (int i = L; i < A.size(); i++) {
hashA = (hashA - A[i-L]*power) * p + A[i];
seen.insert(hashA);
}
ull hashB = 0;
for (int i = 0; i < L; i++) hashB = hashB * p + B[i];
if (seen.count(hashB)) return true;
for (int i = L; i < B.size(); i++) {
hashB = (hashB - B[i-L]*power) * p + B[i];
if (seen.count(hashB)) return true;
}
return false;
}
int longestCommonSubstring(const string& A, const string& B) {
int lo = 0, hi = min(A.size(), B.size()), ans = 0;
while (lo <= hi) {
int mid = (lo + hi) / 2;
if (hasCommonLen(mid, A, B)) {
ans = mid; lo = mid + 1;
} else hi = mid - 1;
}
return ans;
}
int main() {
cout << longestCommonSubstring("abcdxyz", "xyzabcd");
return 0;
}
4.2. Problem 4: Count Distinct Substrings
Count the number of distinct substrings of a string S in O(n²) time (or O(n) with suffix automaton). Here we use rolling hash + a hash‐set.
#include <iostream>
#include <unordered_set>
#include <string>
using namespace std;
using ull = unsigned long long;
long long countDistinct(const string& S) {
int n = S.size();
const ull p = 1315423911ULL;
vector<ull> H(n+1,0), P(n+1,1);
for (int i = 1; i <= n; i++) {
H[i] = H[i-1] * p + S[i-1];
P[i] = P[i-1] * p;
}
unordered_set<ull> seen;
for (int i = 0; i < n; i++)
for (int len = 1; i+len <= n; len++) {
ull h = (H[i+len] - H[i]*P[len]);
seen.insert(h);
}
return seen.size();
}
int main() {
cout << countDistinct("abab");
return 0;
}
5. Hard Problem
5.1. Problem 5: Longest Palindromic Substring
Find the longest palindromic substring of S. We use two rolling hashes (forward and backward) + binary search in O(n log n).
#include <iostream>
#include <string>
#include <vector>
using namespace std;
using ull = unsigned long long;
string longestPalindrome(const string& S) {
int n = S.size();
const ull p = 91138233ULL;
vector<ull> Hf(n+1,0), Hr(n+2,0), P(n+1,1);
for (int i = 1; i <= n; i++) {
Hf[i] = Hf[i-1] * p + S[i-1];
P[i] = P[i-1] * p;
}
for (int i = n; i >= 1; i--) {
Hr[i] = Hr[i+1] * p + S[i-1];
}
auto getF = [&](int l, int r) {
return Hf[r] - Hf[l-1] * P[r-l+1];
};
auto getR = [&](int l, int r) {
return Hr[l] - Hr[r+1] * P[r-l+1];
};
int bestL = 1, bestLen = 1;
auto check = [&](int L) {
for (int i = 1; i + L - 1 <= n; i++) {
int j = i + L - 1;
if (getF(i,j) == getR(i,j)) {
bestL = i; bestLen = L;
return true;
}
}
return false;
};
int lo = 1, hi = n;
while (lo <= hi) {
int mid = (lo + hi) / 2;
if (check(mid)) lo = mid + 1;
else hi = mid - 1;
}
return S.substr(bestL-1, bestLen);
}
int main() {
cout << longestPalindrome("babad");
return 0;
}