Z-Algorithm
1. Algorithm Description
The Z-algorithm computes for a string S
of length n
an array Z
where
Z[i]
is the length of the longest substring starting at i
that matches a prefix of S
.
We maintain a window [L
, R
] which is the interval with maximum R
such that
S[L..R]
is a prefix of S
. For each position i
from 1 to n-1
:
- If
i > R
, we naively match fromi
onward to computeZ[i]
, and setL = i, R = i + Z[i] - 1
. - Otherwise (
i ≤ R
), letk = i – L
.- If
Z[k] < R – i + 1
, we setZ[i] = Z[k]
. - Else, we start matching from
R+1
onward to extend the match and updateZ[i]
andL,R
.
- If
2. Time and Memory Complexity
- Time:
O(n)
— each position is processed in amortized constant time. - Memory:
O(n)
— to store the string and the Z-array.
3. Easy Problems
3.1 Pattern Searching (Exact Match)
Given text T
and pattern P
, find all occurrences of P
in T
.
Concatenate S = P + '#39; + T
(where "quot; is a sentinel not in either string), compute Z on S
,
and report positions i
where Z[i] == |P|
.
#include <iostream>
#include <vector>
#include <string>
using namespace std;
vector<int> z_algorithm(const string &s) {
int n = s.size();
vector<int> Z(n);
int L = 0, R = 0;
for (int i = 1; i < n; i++) {
if (i > R) {
L = R = i;
while (R < n && s[R-L] == s[R]) R++;
Z[i] = R-L; R--;
} else {
int k = i-L;
if (Z[k] < R-i+1) Z[i] = Z[k];
else {
L = i;
while (R < n && s[R-L] == s[R]) R++;
Z[i] = R-L; R--;
}
}
}
return Z;
}
int main() {
string P, T;
cin >> P >> T;
string S = P + '#39; + T;
auto Z = z_algorithm(S);
int m = P.size();
for (int i = m+1; i < (int)S.size(); i++) {
if (Z[i] == m)
cout << (i - m - 1) << '\n';
}
return 0;
}
3.2 Count Distinct Characters in Every Substring of Length k
Given string S
and integer k
, count how many substrings of length k
have all distinct characters.
Slide a window of length k
, and check uniqueness by comparing prefix matches using Z on each window.
#include <iostream>
#include <vector>
#include <string>
#include <unordered_set>
using namespace std;
int main() {
string S; int k;
cin >> S >> k;
int n = S.size(), cnt = 0;
for (int i = 0; i + k <= n; i++) {
unordered_set<char> st;
bool ok = true;
for (int j = i; j < i+k; j++) {
if (st.count(S[j])) { ok = false; break; }
st.insert(S[j]);
}
if (ok) cnt++;
}
cout << cnt << "\n";
return 0;
}
4. Intermediate Problems
4.1 Smallest Period of a String
Find the smallest period p
of a string S
(smallest p
such that S[i] = S[i+p]
for all valid i
).
Compute Z-array on S
, then the smallest i>0
with i + Z[i] == n
is the period.
#include <iostream>
#include <vector>
#include <string>
using namespace std;
vector<int> z_algorithm(const string &s) {
int n = s.size();
vector<int> Z(n);
int L=0,R=0;
for(int i=1;i<n;i++){
if(i>R){
L=R=i;
while(R<n && s[R-L]==s[R]) R++;
Z[i]=R-L; R--;
} else {
int k=i-L;
if(Z[k]<R-i+1) Z[i]=Z[k];
else{
L=i;
while(R<n && s[R-L]==s[R]) R++;
Z[i]=R-L; R--;
}
}
}
return Z;
}
int main(){
string S;
cin >> S;
int n = S.size();
auto Z = z_algorithm(S);
int period = n;
for(int i = 1; i < n; i++){
if(i + Z[i] == n){
period = i;
break;
}
}
cout << period << "\n";
return 0;
}
4.2 Lexicographically Minimal Rotation
To find the lexicographically smallest rotation of S
, consider T = S + S
.
Compute Z on T
for each suffix i < n
to compare with the current best rotation.
#include <iostream>
#include <vector>
#include <string>
using namespace std;
vector<int> z_algorithm(const string &s) {
int n = s.size();
vector<int> Z(n);
int L=0,R=0;
for(int i=1;i<n;i++){
if(i>R){
L=R=i;
while(R<n && s[R-L]==s[R]) R++;
Z[i]=R-L; R--;
} else {
int k=i-L;
if(Z[k]<R-i+1) Z[i]=Z[k];
else{
L=i;
while(R<n && s[R-L]==s[R]) R++;
Z[i]=R-L; R--;
}
}
}
return Z;
}
int main(){
string S;
cin >> S;
string T = S + S;
int n = S.size(), best = 0;
auto Z = z_algorithm(T);
for(int i = 1; i < n; i++){
int len = Z[i];
if(i+len < 2*n && T[i+len] < T[best+len])
best = i;
}
cout << T.substr(best, n) << "\n";
return 0;
}
5. Hard Problem
5.1 Pattern Matching with ≤k Mismatches
Given text T
, pattern P
, and integer k
, find all positions where Hamming distance between P
and the substring of T
is ≤ k
.
Compute Z on both P+'#39;+T
and rev(P)+'#39;+rev(T)
to get longest prefix and suffix matches, then check if prefix_len + suffix_len ≥ |P| - k
.
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
vector<int> z_algorithm(const string &s) {
int n = s.size();
vector<int> Z(n);
int L=0,R=0;
for(int i=1;i<n;i++){
if(i>R){
L=R=i;
while(R<n && s[R-L]==s[R]) R++;
Z[i]=R-L; R--;
} else {
int k=i-L;
if(Z[k]<R-i+1) Z[i]=Z[k];
else{
L=i;
while(R<n && s[R-L]==s[R]) R++;
Z[i]=R-L; R--;
}
}
}
return Z;
}
int main(){
string P, T;
int k;
cin >> P >> T >> k;
int n = P.size(), m = T.size();
string S1 = P + '#39; + T;
auto Z1 = z_algorithm(S1);
string Pr = P; reverse(Pr.begin(), Pr.end());
string Tr = T; reverse(Tr.begin(), Tr.end());
string S2 = Pr + '#39; + Tr;
auto Z2 = z_algorithm(S2);
for(int i = 0; i + n <= m; i++){
int pre = Z1[n+1 + i];
int suf = Z2[n+1 + (m - i - n)];
if(pre + suf >= n - k)
cout << i << "\n";
}
return 0;
}