KMP Algorithm (Knuth–Morris–Pratt)
1. Algorithm Overview
The KMP algorithm searches for occurrences of a “pattern” string P
of length m
within a “text” string T
of length n
.
It avoids re-examining characters by precomputing a longest-proper-prefix-which-is-also-suffix (LPS
) array for the pattern.
-
Build LPS array:
lps[i]
= length of the longest proper prefix ofP[0…i]
which is also a suffix.- Computed in
O(m)
by a two-pointer scan over the pattern.
-
Search:
- Scan text with two indices
i
(text) andj
(pattern). - On mismatch, slide the pattern by setting
j = lps[j-1]
without incrementingi
. - On match, advance both
i
andj
. - Whenever
j == m
, a match is found ati - m
.
- Scan text with two indices
2. Time and Memory Complexity
- Time:
O(m + n)
— building the LPS array costsO(m)
, searching costsO(n)
. - Memory:
O(m)
for the LPS array (in addition to storing the input strings).
3. Easy Problems
3.1. Substring Existence
Check if P
appears in T
at least once.
#include <bits/stdc++.h>
using namespace std;
vector buildLPS(const string &P) {
int m = P.size();
vector lps(m, 0);
int len = 0; // length of the previous longest prefix suffix
for (int i = 1; i < m; ) {
if (P[i] == P[len]) {
lps[i++] = ++len;
} else if (len) {
len = lps[len - 1];
} else {
lps[i++] = 0;
}
}
return lps;
}
bool contains(const string &T, const string &P) {
int n = T.size(), m = P.size();
auto lps = buildLPS(P);
for (int i = 0, j = 0; i < n; ) {
if (T[i] == P[j]) {
i++; j++;
if (j == m) return true;
} else if (j) {
j = lps[j - 1];
} else {
i++;
}
}
return false;
}
int main() {
string text = "hello world", pattern = "world";
cout << (contains(text, pattern) ? "Found\n" : "Not Found\n");
return 0;
}
3.2. Count Occurrences
Count how many times P
occurs in T
(including overlaps).
#include <bits/stdc++.h>
using namespace std;
vector buildLPS(const string &P) {
int m = P.size();
vector lps(m, 0);
int len = 0;
for (int i = 1; i < m; ) {
if (P[i] == P[len]) lps[i++] = ++len;
else if (len) len = lps[len - 1];
else lps[i++] = 0;
}
return lps;
}
int countOccurrences(const string &T, const string &P) {
int n = T.size(), m = P.size(), cnt = 0;
auto lps = buildLPS(P);
for (int i = 0, j = 0; i < n; ) {
if (T[i] == P[j]) {
i++; j++;
if (j == m) {
cnt++;
j = lps[j - 1];
}
} else if (j) {
j = lps[j - 1];
} else {
i++;
}
}
return cnt;
}
int main() {
cout << countOccurrences("abababa", "aba") << "\n"; // Outputs 3
return 0;
}
4. Intermediate Problems
4.1. Longest Prefix Which Is Also Suffix (Proper)
Given a string S
, find the length of the longest proper prefix which is also a suffix.
#include <bits/stdc++.h>
using namespace std;
vector buildLPS(const string &P) {
int m = P.size();
vector lps(m, 0);
int len = 0;
for (int i = 1; i < m; ) {
if (P[i] == P[len]) lps[i++] = ++len;
else if (len) len = lps[len - 1];
else lps[i++] = 0;
}
return lps;
}
int longestProperPrefSuffix(const string &S) {
auto lps = buildLPS(S);
return lps.back();
}
int main() {
cout << longestProperPrefSuffix("ababcabab") << "\n"; // Outputs 5 ("ababc")
return 0;
}
4.2. Pattern in a Circular String
Check if P
occurs in any rotation of T
.
#include <bits/stdc++.h>
using namespace std;
vector buildLPS(const string &P) {
int m = P.size();
vector lps(m, 0);
int len = 0;
for (int i = 1; i < m; ) {
if (P[i] == P[len]) lps[i++] = ++len;
else if (len) len = lps[len - 1];
else lps[i++] = 0;
}
return lps;
}
bool inCircular(const string &T, const string &P) {
// Concatenate T with itself and search P
string TT = T + T;
auto lps = buildLPS(P);
int n = TT.size(), m = P.size();
for (int i = 0, j = 0; i < n; ) {
if (TT[i] == P[j]) {
i++; j++;
if (j == m) return true;
} else if (j) {
j = lps[j - 1];
} else {
i++;
}
}
return false;
}
int main() {
cout << (inCircular("abcde", "deab") ? "Yes\n" : "No\n");
return 0;
}
5. Hard Problem
5.1. Shortest Palindrome by Adding Characters in Front
Given S
, find the shortest palindrome you can form by adding characters in front of it.
Approach: Build string A = S + "#" + reverse(S)
, compute its LPS, and use the last LPS value to know the longest palindromic prefix.
#include <bits/stdc++.h>
using namespace std;
vector buildLPS(const string &P) {
int m = P.size();
vector lps(m, 0);
int len = 0;
for (int i = 1; i < m; ) {
if (P[i] == P[len]) lps[i++] = ++len;
else if (len) len = lps[len - 1];
else lps[i++] = 0;
}
return lps;
}
string shortestPalindrome(string s) {
string rev = s;
reverse(rev.begin(), rev.end());
string A = s + "#" + rev;
auto lps = buildLPS(A);
int palLen = lps.back(); // length of longest palindromic prefix
string toAdd = rev.substr(0, s.size() - palLen);
return toAdd + s;
}
int main() {
cout << shortestPalindrome("aacecaaa") << "\n"; // Outputs "aaacecaaa"
cout << shortestPalindrome("abcd") << "\n"; // Outputs "dcbabcd"
return 0;
}