Suffix Arrays

1. Algorithm Overview

A suffix array for a string S of length n is an array SA[0…n−1] of integers giving the starting positions of all suffixes of S in lexicographical order. A common construction algorithm is the prefix-doubling method:

Initialize:
- Let rank[i] = character code of S[i] (e.g. ASCII).
- Let SA[i] = i for all 0 ≤ i < n.
Iterate with k = 1, 2, 4, 8, … while k < n:
- Sort SA by the key (rank[i], rank[i+k]), using a stable sort (radix sort or counting sort) in O(n).
- Recompute a temporary array tmp of new ranks: tmp[SA[0]] = 0, and for i>0, tmp[SA[i]] = tmp[SA[i−1]] + (key(SA[i]) ≠ key(SA[i−1])).
- Copy rank = tmp. If all ranks are distinct (max rank = n−1), break early.
- Double k = k × 2.

2. Time and Memory Complexity

Time:
- Each doubling step sorts in O(n) using radix/counting sort.
- Number of steps ~ ⌈log₂n⌉. → Total O(n log n).
Memory:
- Arrays SA, rank, tmp each size n. → O(n) extra memory besides the input string.

3. Easy Problems

Pattern Search (does pattern P appear in text S?)

Build SA for S, then binary-search for P in the suffixes in O(m log n), where m=|P|.

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;

vector<int> build_sa(const string &s) {
    int n = s.size(), k = 1;
    vector<int> sa(n), rank(n), tmp(n);
    for (int i = 0; i < n; ++i) {
        sa[i] = i;
        rank[i] = (unsigned char)s[i];
    }
    while (k < n) {
        auto cmp = [&](int i, int j) {
            if (rank[i] != rank[j]) return rank[i] < rank[j];
            int ri = i+k < n ? rank[i+k] : -1;
            int rj = j+k < n ? rank[j+k] : -1;
            return ri < rj;
        };
        sort(sa.begin(), sa.end(), cmp);
        tmp[sa[0]] = 0;
        for (int i = 1; i < n; ++i)
            tmp[sa[i]] = tmp[sa[i-1]] + (cmp(sa[i-1], sa[i]) ? 1 : 0);
        rank = tmp;
        k <<= 1;
    }
    return sa;
}

bool contains(const string &S, const string &P) {
    auto sa = build_sa(S);
    int n = S.size(), m = P.size();
    int l = 0, r = n;
    while (l < r) {
        int mid = (l + r) / 2;
        if (S.compare(sa[mid], m, P) < 0) l = mid + 1;
        else r = mid;
    }
    if (l < n && S.compare(sa[l], m, P) == 0) return true;
    return false;
}

int main() {
    string S = "banana", P = "ana";
    cout << (contains(S, P) ? "YES\n" : "NO\n");
    return 0;
}

Count Pattern Occurrences

After finding the lower bound for P, find the upper bound. Difference gives count in O(m log n).

// ... reuse build_sa from above ...

int count_occurrences(const string &S, const string &P) {
    auto sa = build_sa(S);
    int n = S.size(), m = P.size();
    auto cmp_prefix = [&](int i, const string &P) {
        return S.compare(i, m, P) < 0;
    };
    int lo = 0, hi = n;
    // lower bound
    while (lo < hi) {
        int mid = (lo + hi) / 2;
        if (cmp_prefix(sa[mid], P)) lo = mid + 1;
        else hi = mid;
    }
    int start = lo;
    // upper bound
    lo = 0; hi = n;
    while (lo < hi) {
        int mid = (lo + hi) / 2;
        if (S.compare(sa[mid], m, P) <= 0) lo = mid + 1;
        else hi = mid;
    }
    int end = lo;
    return end - start;
}

int main() {
    string S = "banana", P = "ana";
    cout << count_occurrences(S, P) << "\n";
    return 0;
}

4. Intermediate Problems

Count Distinct Substrings

The total number of substrings of S is n(n+1)/2. Subtract the sum of LCPs between adjacent suffixes in SA.

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;

// build_sa as above

vector<int> build_lcp(const string &s, const vector<int>& sa) {
    int n = s.size(), h = 0;
    vector<int> rank(n), lcp(n-1);
    for (int i = 0; i < n; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; ++i) if (rank[i] > 0) {
        int j = sa[rank[i]-1];
        while (i+h < n && j+h < n && s[i+h]==s[j+h]) h++;
        lcp[rank[i]-1] = h;
        if (h > 0) h--;
    }
    return lcp;
}

long long count_distinct(const string &S) {
    int n = S.size();
    auto sa = build_sa(S);
    auto lcp = build_lcp(S, sa);
    long long total = 1LL*n*(n+1)/2;
    for (int x : lcp) total -= x;
    return total;
}

int main() {
    cout << count_distinct("banana") << "\n";
    return 0;
}

Longest Common Substring of Two Strings

Concatenate S=T1+#+T2+$. Build SA and LCP; scan for max LCP where the two suffixes come from different parts.

#include <iostream>
#include <vector>
#include <string>>
#include <algorithm>>
using namespace std;
// build_sa, build_lcp as above

string longest_common_substring(const string &A, const string &B) {
    string S = A + char(#) + B + char($);
    int n = S.size(), nA = A.size();
    auto sa = build_sa(S);
    auto lcp = build_lcp(S, sa);
    int best = 0, idx = 0;
    for (int i = 0; i+1 < n; ++i) {
        bool inA = sa[i] < nA;
        bool inB = sa[i+1] < nA;
        if (inA ^ inB && lcp[i] > best) {
            best = lcp[i];
            idx = sa[i];
        }
    }
    return S.substr(idx, best);
}

int main() {
    cout << longest_common_substring("xabxac","abcabxabcd") << "\n";
    return 0;
}

5. Hard Problem

K-th Lexicographical Substring

Find the k-th smallest distinct substring of S. Use SA and LCP: each suffix SA[i] contributes (n − SA[i]) − LCP[i−1] new substrings.

#include <iostream>>
#include <vector>>
#include <string>>
#include <algorithm>>
using namespace std;
// build_sa, build_lcp as above

string kth_substring(const string &S, long long k) {
    int n = S.size();
    auto sa = build_sa(S);
    auto lcp = build_lcp(S, sa);
    for (int i = 0; i < n; ++i) {
        long long contrib = (n - sa[i]) - (i? lcp[i-1] : 0);
        if (k <= contrib) {
            int length = (i? lcp[i-1] : 0) + k;
            return S.substr(sa[i], length);
        }
        k -= contrib;
    }
    return ""; // k too large
}

int main() {
    string S = "banana";
    long long k = 5;
    cout << kth_substring(S, k) << "\n";
    return 0;
}

Ant In Loop

Sorting Algorithms

Searching Algorithms

Mathematical Algorithms

Graph Algorithms

Dynamic Programming

String Algorithms

Data Structure

Backtracking

Greedy Algorithms

Bit Manipulation

Geometry Algorithms

Advanced Techniques

Recurrence & Divide and Conquer