KMP Algorithm and Applications

KMP Algorithm (Knuth–Morris–Pratt)

1. Algorithm Overview

The KMP algorithm searches for occurrences of a “pattern” string P of length m within a “text” string T of length n. It avoids re-examining characters by precomputing a longest-proper-prefix-which-is-also-suffix (LPS) array for the pattern.

  1. Build LPS array:
    • lps[i] = length of the longest proper prefix of P[0…i] which is also a suffix.
    • Computed in O(m) by a two-pointer scan over the pattern.
  2. Search:
    • Scan text with two indices i (text) and j (pattern).
    • On mismatch, slide the pattern by setting j = lps[j-1] without incrementing i.
    • On match, advance both i and j.
    • Whenever j == m, a match is found at i - m.

2. Time and Memory Complexity

  • Time: O(m + n) — building the LPS array costs O(m), searching costs O(n).
  • Memory: O(m) for the LPS array (in addition to storing the input strings).

3. Easy Problems

3.1. Substring Existence

Check if P appears in T at least once.

#include <bits/stdc++.h>
using namespace std;

vector buildLPS(const string &P) {
    int m = P.size();
    vector lps(m, 0);
    int len = 0;  // length of the previous longest prefix suffix
    for (int i = 1; i < m; ) {
        if (P[i] == P[len]) {
            lps[i++] = ++len;
        } else if (len) {
            len = lps[len - 1];
        } else {
            lps[i++] = 0;
        }
    }
    return lps;
}

bool contains(const string &T, const string &P) {
    int n = T.size(), m = P.size();
    auto lps = buildLPS(P);
    for (int i = 0, j = 0; i < n; ) {
        if (T[i] == P[j]) {
            i++; j++;
            if (j == m) return true;
        } else if (j) {
            j = lps[j - 1];
        } else {
            i++;
        }
    }
    return false;
}

int main() {
    string text = "hello world", pattern = "world";
    cout << (contains(text, pattern) ? "Found\n" : "Not Found\n");
    return 0;
}

3.2. Count Occurrences

Count how many times P occurs in T (including overlaps).

#include <bits/stdc++.h>
using namespace std;

vector buildLPS(const string &P) {
    int m = P.size();
    vector lps(m, 0);
    int len = 0;
    for (int i = 1; i < m; ) {
        if (P[i] == P[len]) lps[i++] = ++len;
        else if (len) len = lps[len - 1];
        else lps[i++] = 0;
    }
    return lps;
}

int countOccurrences(const string &T, const string &P) {
    int n = T.size(), m = P.size(), cnt = 0;
    auto lps = buildLPS(P);
    for (int i = 0, j = 0; i < n; ) {
        if (T[i] == P[j]) {
            i++; j++;
            if (j == m) {
                cnt++;
                j = lps[j - 1];
            }
        } else if (j) {
            j = lps[j - 1];
        } else {
            i++;
        }
    }
    return cnt;
}

int main() {
    cout << countOccurrences("abababa", "aba") << "\n"; // Outputs 3
    return 0;
}

4. Intermediate Problems

4.1. Longest Prefix Which Is Also Suffix (Proper)

Given a string S, find the length of the longest proper prefix which is also a suffix.

#include <bits/stdc++.h>
using namespace std;

vector buildLPS(const string &P) {
    int m = P.size();
    vector lps(m, 0);
    int len = 0;
    for (int i = 1; i < m; ) {
        if (P[i] == P[len]) lps[i++] = ++len;
        else if (len) len = lps[len - 1];
        else lps[i++] = 0;
    }
    return lps;
}

int longestProperPrefSuffix(const string &S) {
    auto lps = buildLPS(S);
    return lps.back();
}

int main() {
    cout << longestProperPrefSuffix("ababcabab") << "\n"; // Outputs 5 ("ababc")
    return 0;
}

4.2. Pattern in a Circular String

Check if P occurs in any rotation of T.

#include <bits/stdc++.h>
using namespace std;

vector buildLPS(const string &P) {
    int m = P.size();
    vector lps(m, 0);
    int len = 0;
    for (int i = 1; i < m; ) {
        if (P[i] == P[len]) lps[i++] = ++len;
        else if (len) len = lps[len - 1];
        else lps[i++] = 0;
    }
    return lps;
}

bool inCircular(const string &T, const string &P) {
    // Concatenate T with itself and search P
    string TT = T + T;
    auto lps = buildLPS(P);
    int n = TT.size(), m = P.size();
    for (int i = 0, j = 0; i < n; ) {
        if (TT[i] == P[j]) {
            i++; j++;
            if (j == m) return true;
        } else if (j) {
            j = lps[j - 1];
        } else {
            i++;
        }
    }
    return false;
}

int main() {
    cout << (inCircular("abcde", "deab") ? "Yes\n" : "No\n");
    return 0;
}

5. Hard Problem

5.1. Shortest Palindrome by Adding Characters in Front

Given S, find the shortest palindrome you can form by adding characters in front of it.
Approach: Build string A = S + "#" + reverse(S), compute its LPS, and use the last LPS value to know the longest palindromic prefix.

#include <bits/stdc++.h>
using namespace std;

vector buildLPS(const string &P) {
    int m = P.size();
    vector lps(m, 0);
    int len = 0;
    for (int i = 1; i < m; ) {
        if (P[i] == P[len]) lps[i++] = ++len;
        else if (len) len = lps[len - 1];
        else lps[i++] = 0;
    }
    return lps;
}

string shortestPalindrome(string s) {
    string rev = s;
    reverse(rev.begin(), rev.end());
    string A = s + "#" + rev;
    auto lps = buildLPS(A);
    int palLen = lps.back();  // length of longest palindromic prefix
    string toAdd = rev.substr(0, s.size() - palLen);
    return toAdd + s;
}

int main() {
    cout << shortestPalindrome("aacecaaa") << "\n";  // Outputs "aaacecaaa"
    cout << shortestPalindrome("abcd") << "\n";      // Outputs "dcbabcd"
    return 0;
}