Rolling Hashes

1. Algorithm Overview

A rolling hash is a hash function designed to enable efficient computation of hashes of all substrings of a given length in a text, by reusing the previous hash. For a string s of length n, define the hash of the prefix s[0..i) as:


H(i) = (s[0]·pi−1 + s[1]·pi−2 + … + s[i−1]·p0) mod M
  

Here, p is a chosen base (e.g. 31 or 257), and M is a large prime modulus (e.g. 109+7). To compute the hash of s[i..i+ℓ) in O(1) time, we precompute power[k] = pk mod M and prefix hashes H(i) for all i. Then:


hash(s[i..i+ℓ)) = ( H(i+ℓ) − H(i)·power[ℓ] % M + M ) % M
  

2. Time & Memory Complexity

  • Preprocessing: O(n) to compute H(0..n) and power[0..n].
  • Querying one substring hash: O(1).
  • Total for scanning all substrings of length ℓ: O(n).
  • Memory: O(n) for the H and power arrays.

3. Easy Problems

3.1. Problem 1: Rabin–Karp String Search

Find all occurrences of a pattern P (length m) in text T (length n) in average O(n+m) time.


#include <iostream>
#include <vector>
#include <string>
using namespace std;
using ull = unsigned long long;

vector rabinKarp(const string& T, const string& P) {
    int n = T.size(), m = P.size();
    const ull p = 1315423911ULL;
    ull hashP = 0, hashT = 0, power = 1;
    for (int i = 0; i < m; i++) {
        hashP = hashP * p + P[i];
        hashT = hashT * p + T[i];
        if (i > 0) power *= p;
    }
    vector res;
    if (hashP == hashT) res.push_back(0);
    for (int i = m; i < n; i++) {
        hashT = hashT - T[i-m]*power;
        hashT = hashT * p + T[i];
        if (hashT == hashP) res.push_back(i-m+1);
    }
    return res;
}

int main() {
    string T = "abracadabra", P = "abra";
    auto occ = rabinKarp(T, P);
    for (int idx : occ) cout << idx << " ";
    return 0;
}
  

3.2. Problem 2: Check for Duplicate Substrings of Length K

Given a string S and integer K, determine if any length-K substring appears at least twice.


#include <iostream>
#include <unordered_set>
#include <string>
using namespace std;
using ull = unsigned long long;

bool hasDuplicateK(const string& S, int K) {
    int n = S.size();
    if (K > n) return false;
    const ull p = 1315423911ULL;
    ull hash = 0, power = 1;
    for (int i = 0; i < K; i++) {
        hash = hash * p + S[i];
        if (i > 0) power *= p;
    }
    unordered_set<ull> seen;
    seen.insert(hash);
    for (int i = K; i < n; i++) {
        hash = hash - S[i-K]*power;
        hash = hash * p + S[i];
        if (seen.count(hash)) return true;
        seen.insert(hash);
    }
    return false;
}

int main() {
    cout << (hasDuplicateK("banana", 2) ? "Yes" : "No");
    return 0;
}
  

4. Intermediate Problems

4.1. Problem 3: Longest Common Substring of Two Strings

Given strings A and B, find the length of their longest common substring. Use binary search on length + rolling hash to test existence in O((n+m)·log min(n,m)).


#include <iostream>
#include <vector>
#include <unordered_set>
#include <string>>
using namespace std;
using ull = unsigned long long;

bool hasCommonLen(int L, const string& A, const string& B) {
    if (L == 0) return true;
    const ull p = 1315423911ULL;
    ull hashA = 0, power = 1;
    unordered_set<ull> seen;
    for (int i = 0; i < L; i++) {
        hashA = hashA * p + A[i];
        if (i > 0) power *= p;
    }
    seen.insert(hashA);
    for (int i = L; i < A.size(); i++) {
        hashA = (hashA - A[i-L]*power) * p + A[i];
        seen.insert(hashA);
    }
    ull hashB = 0;
    for (int i = 0; i < L; i++) hashB = hashB * p + B[i];
    if (seen.count(hashB)) return true;
    for (int i = L; i < B.size(); i++) {
        hashB = (hashB - B[i-L]*power) * p + B[i];
        if (seen.count(hashB)) return true;
    }
    return false;
}

int longestCommonSubstring(const string& A, const string& B) {
    int lo = 0, hi = min(A.size(), B.size()), ans = 0;
    while (lo <= hi) {
        int mid = (lo + hi) / 2;
        if (hasCommonLen(mid, A, B)) {
            ans = mid; lo = mid + 1;
        } else hi = mid - 1;
    }
    return ans;
}

int main() {
    cout << longestCommonSubstring("abcdxyz", "xyzabcd");
    return 0;
}
  

4.2. Problem 4: Count Distinct Substrings

Count the number of distinct substrings of a string S in O(n²) time (or O(n) with suffix automaton). Here we use rolling hash + a hash‐set.


#include <iostream>
#include <unordered_set>
#include <string>
using namespace std;
using ull = unsigned long long;

long long countDistinct(const string& S) {
    int n = S.size();
    const ull p = 1315423911ULL;
    vector<ull> H(n+1,0), P(n+1,1);
    for (int i = 1; i <= n; i++) {
        H[i] = H[i-1] * p + S[i-1];
        P[i] = P[i-1] * p;
    }
    unordered_set<ull> seen;
    for (int i = 0; i < n; i++)
      for (int len = 1; i+len <= n; len++) {
        ull h = (H[i+len] - H[i]*P[len]);
        seen.insert(h);
      }
    return seen.size();
}

int main() {
    cout << countDistinct("abab");
    return 0;
}
  

5. Hard Problem

5.1. Problem 5: Longest Palindromic Substring

Find the longest palindromic substring of S. We use two rolling hashes (forward and backward) + binary search in O(n log n).


#include <iostream>
#include <string>
#include <vector>
using namespace std;
using ull = unsigned long long;

string longestPalindrome(const string& S) {
    int n = S.size();
    const ull p = 91138233ULL;
    vector<ull> Hf(n+1,0), Hr(n+2,0), P(n+1,1);
    for (int i = 1; i <= n; i++) {
        Hf[i] = Hf[i-1] * p + S[i-1];
        P[i] = P[i-1] * p;
    }
    for (int i = n; i >= 1; i--) {
        Hr[i] = Hr[i+1] * p + S[i-1];
    }
    auto getF = [&](int l, int r) {
        return Hf[r] - Hf[l-1] * P[r-l+1];
    };
    auto getR = [&](int l, int r) {
        return Hr[l] - Hr[r+1] * P[r-l+1];
    };
    int bestL = 1, bestLen = 1;
    auto check = [&](int L) {
        for (int i = 1; i + L - 1 <= n; i++) {
            int j = i + L - 1;
            if (getF(i,j) == getR(i,j)) {
                bestL = i; bestLen = L;
                return true;
            }
        }
        return false;
    };
    int lo = 1, hi = n;
    while (lo <= hi) {
        int mid = (lo + hi) / 2;
        if (check(mid)) lo = mid + 1;
        else hi = mid - 1;
    }
    return S.substr(bestL-1, bestLen);
}

int main() {
    cout << longestPalindrome("babad");
    return 0;
}