Z-Algorithm

1. Algorithm Description

The Z-algorithm computes for a string S of length n an array Z where Z[i] is the length of the longest substring starting at i that matches a prefix of S. We maintain a window [L, R] which is the interval with maximum R such that S[L..R] is a prefix of S. For each position i from 1 to n-1:

  1. If i > R, we naively match from i onward to compute Z[i], and set L = i, R = i + Z[i] - 1.
  2. Otherwise (i ≤ R), let k = i – L.
    • If Z[k] < R – i + 1, we set Z[i] = Z[k].
    • Else, we start matching from R+1 onward to extend the match and update Z[i] and L,R.

2. Time and Memory Complexity

  • Time: O(n) — each position is processed in amortized constant time.
  • Memory: O(n) — to store the string and the Z-array.

3. Easy Problems

3.1 Pattern Searching (Exact Match)

Given text T and pattern P, find all occurrences of P in T. Concatenate S = P + '#39; + T (where "quot; is a sentinel not in either string), compute Z on S, and report positions i where Z[i] == |P|.

#include <iostream>
#include <vector>
#include <string>
using namespace std;

vector<int> z_algorithm(const string &s) {
    int n = s.size();
    vector<int> Z(n);
    int L = 0, R = 0;
    for (int i = 1; i < n; i++) {
        if (i > R) {
            L = R = i;
            while (R < n && s[R-L] == s[R]) R++;
            Z[i] = R-L; R--;
        } else {
            int k = i-L;
            if (Z[k] < R-i+1) Z[i] = Z[k];
            else {
                L = i;
                while (R < n && s[R-L] == s[R]) R++;
                Z[i] = R-L; R--;
            }
        }
    }
    return Z;
}

int main() {
    string P, T;
    cin >> P >> T;
    string S = P + '#39; + T;
    auto Z = z_algorithm(S);
    int m = P.size();
    for (int i = m+1; i < (int)S.size(); i++) {
        if (Z[i] == m)
            cout << (i - m - 1) << '\n';
    }
    return 0;
}

3.2 Count Distinct Characters in Every Substring of Length k

Given string S and integer k, count how many substrings of length k have all distinct characters. Slide a window of length k, and check uniqueness by comparing prefix matches using Z on each window.

#include <iostream>
#include <vector>
#include <string>
#include <unordered_set>
using namespace std;

int main() {
    string S; int k;
    cin >> S >> k;
    int n = S.size(), cnt = 0;
    for (int i = 0; i + k <= n; i++) {
        unordered_set<char> st;
        bool ok = true;
        for (int j = i; j < i+k; j++) {
            if (st.count(S[j])) { ok = false; break; }
            st.insert(S[j]);
        }
        if (ok) cnt++;
    }
    cout << cnt << "\n";
    return 0;
}

4. Intermediate Problems

4.1 Smallest Period of a String

Find the smallest period p of a string S (smallest p such that S[i] = S[i+p] for all valid i). Compute Z-array on S, then the smallest i>0 with i + Z[i] == n is the period.

#include <iostream>
#include <vector>
#include <string>
using namespace std;

vector<int> z_algorithm(const string &s) {
    int n = s.size();
    vector<int> Z(n);
    int L=0,R=0;
    for(int i=1;i<n;i++){
        if(i>R){
            L=R=i;
            while(R<n && s[R-L]==s[R]) R++;
            Z[i]=R-L; R--;
        } else {
            int k=i-L;
            if(Z[k]<R-i+1) Z[i]=Z[k];
            else{
                L=i;
                while(R<n && s[R-L]==s[R]) R++;
                Z[i]=R-L; R--;
            }
        }
    }
    return Z;
}

int main(){
    string S;
    cin >> S;
    int n = S.size();
    auto Z = z_algorithm(S);
    int period = n;
    for(int i = 1; i < n; i++){
        if(i + Z[i] == n){
            period = i;
            break;
        }
    }
    cout << period << "\n";
    return 0;
}

4.2 Lexicographically Minimal Rotation

To find the lexicographically smallest rotation of S, consider T = S + S. Compute Z on T for each suffix i < n to compare with the current best rotation.

#include <iostream>
#include <vector>
#include <string>
using namespace std;

vector<int> z_algorithm(const string &s) {
    int n = s.size();
    vector<int> Z(n);
    int L=0,R=0;
    for(int i=1;i<n;i++){
        if(i>R){
            L=R=i;
            while(R<n && s[R-L]==s[R]) R++;
            Z[i]=R-L; R--;
        } else {
            int k=i-L;
            if(Z[k]<R-i+1) Z[i]=Z[k];
            else{
                L=i;
                while(R<n && s[R-L]==s[R]) R++;
                Z[i]=R-L; R--;
            }
        }
    }
    return Z;
}

int main(){
    string S;
    cin >> S;
    string T = S + S;
    int n = S.size(), best = 0;
    auto Z = z_algorithm(T);
    for(int i = 1; i < n; i++){
        int len = Z[i];
        if(i+len < 2*n && T[i+len] < T[best+len])
            best = i;
    }
    cout << T.substr(best, n) << "\n";
    return 0;
}

5. Hard Problem

5.1 Pattern Matching with ≤k Mismatches

Given text T, pattern P, and integer k, find all positions where Hamming distance between P and the substring of T is ≤ k. Compute Z on both P+'#39;+T and rev(P)+'#39;+rev(T) to get longest prefix and suffix matches, then check if prefix_len + suffix_len ≥ |P| - k.

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;

vector<int> z_algorithm(const string &s) {
    int n = s.size();
    vector<int> Z(n);
    int L=0,R=0;
    for(int i=1;i<n;i++){
        if(i>R){
            L=R=i;
            while(R<n && s[R-L]==s[R]) R++;
            Z[i]=R-L; R--;
        } else {
            int k=i-L;
            if(Z[k]<R-i+1) Z[i]=Z[k];
            else{
                L=i;
                while(R<n && s[R-L]==s[R]) R++;
                Z[i]=R-L; R--;
            }
        }
    }
    return Z;
}

int main(){
    string P, T;
    int k;
    cin >> P >> T >> k;
    int n = P.size(), m = T.size();
    string S1 = P + '#39; + T;
    auto Z1 = z_algorithm(S1);
    string Pr = P; reverse(Pr.begin(), Pr.end());
    string Tr = T; reverse(Tr.begin(), Tr.end());
    string S2 = Pr + '#39; + Tr;
    auto Z2 = z_algorithm(S2);

    for(int i = 0; i + n <= m; i++){
        int pre = Z1[n+1 + i];
        int suf = Z2[n+1 + (m - i - n)];
        if(pre + suf >= n - k)
            cout << i << "\n";
    }
    return 0;
}