:first-child{margin-top:0!important}.markdown-body>:last-child{margin-bottom:0!important}.markdown-body a:not([href]){color:inherit;text-decoration:none}.markdown-body .absent{color:var(--color-danger-fg)}.markdown-body .anchor{float:left;margin-left:-20px;padding-right:4px;line-height:1}.markdown-body .anchor:focus{outline:none}.markdown-body p,.markdown-body blockquote,.markdown-body ul,.markdown-body ol,.markdown-body dl,.markdown-body table,.markdown-body pre,.markdown-body details{margin-top:0;margin-bottom:16px}.markdown-body hr{height:.25em;background-color:var(--color-border-default);border:0;margin:24px 0;padding:0}.markdown-body blockquote{color:var(--color-fg-muted);border-left:.25em solid var(--color-border-default);padding:0 1em}.markdown-body blockquote>:first-child{margin-top:0}.markdown-body blockquote>:last-child{margin-bottom:0}.markdown-body h1,.markdown-body h2,.markdown-body h3,.markdown-body h4,.markdown-body h5,.markdown-body h6{font-weight:var(--base-text-weight-semibold,600);margin-top:24px;margin-bottom:16px;line-height:1.25}.markdown-body h1 .octicon-link,.markdown-body h2 .octicon-link,.markdown-body h3 .octicon-link,.markdown-body h4 .octicon-link,.markdown-body h5 .octicon-link,.markdown-body h6 .octicon-link{color:var(--color-fg-default);vertical-align:middle;visibility:hidden}.markdown-body h1:hover .anchor,.markdown-body h2:hover .anchor,.markdown-body h3:hover .anchor,.markdown-body h4:hover .anchor,.markdown-body h5:hover .anchor,.markdown-body h6:hover .anchor{text-decoration:none}.markdown-body h1:hover .anchor .octicon-link,.markdown-body h2:hover .anchor .octicon-link,.markdown-body h3:hover .anchor .octicon-link,.markdown-body h4:hover .anchor .octicon-link,.markdown-body h5:hover .anchor .octicon-link,.markdown-body h6:hover .anchor .octicon-link{visibility:visible}.markdown-body h1 tt,.markdown-body h1 code,.markdown-body h2 tt,.markdown-body h2 code,.markdown-body h3 tt,.markdown-body h3 code,.markdown-body h4 tt,.markdown-body h4 code,.markdown-body h5 tt,.markdown-body h5 code,.markdown-body h6 tt,.markdown-body h6 code{font-size:inherit;padding:0 .2em}.markdown-body h1{border-bottom:1px solid var(--color-border-muted);padding-bottom:.3em;font-size:2em}.markdown-body h2{border-bottom:1px solid var(--color-border-muted);padding-bottom:.3em;font-size:1.5em}.markdown-body h3{font-size:1.25em}.markdown-body h4{font-size:1em}.markdown-body h5{font-size:.875em}.markdown-body h6{color:var(--color-fg-muted);font-size:.85em}.markdown-body summary h1,.markdown-body summary h2,.markdown-body summary h3,.markdown-body summary h4,.markdown-body summary h5,.markdown-body summary h6{display:inline-block}.markdown-body summary h1 .anchor,.markdown-body summary h2 .anchor,.markdown-body summary h3 .anchor,.markdown-body summary h4 .anchor,.markdown-body summary h5 .anchor,.markdown-body summary h6 .anchor{margin-left:-40px}.markdown-body summary h1,.markdown-body summary h2{border-bottom:0;padding-bottom:0}.markdown-body ul,.markdown-body ol{padding-left:2em}.markdown-body ul.no-list,.markdown-body ol.no-list{padding:0;list-style-type:none}.markdown-body ol[type=a]{list-style-type:lower-alpha}.markdown-body ol[type=A]{list-style-type:upper-alpha}.markdown-body ol[type=i]{list-style-type:lower-roman}.markdown-body ol[type=I]{list-style-type:upper-roman}.markdown-body ol[type="1"]{list-style-type:decimal}.markdown-body div>ol:not([type]){list-style-type:decimal}.markdown-body ul ul,.markdown-body ul ol,.markdown-body ol ol,.markdown-body ol ul{margin-top:0;margin-bottom:0}.markdown-body li>p{margin-top:16px}.markdown-body li+li{margin-top:.25em}.markdown-body dl{padding:0}.markdown-body dl dt{font-size:1em;font-style:italic;font-weight:var(--base-text-weight-semibold,600);margin-top:16px;padding:0}.markdown-body dl dd{margin-bottom:16px;padding:0 16px}.markdown-body table{width:100%;width:-webkit-max-content;width:-webkit-max-content;width:max-content;max-width:100%;display:block;overflow:auto}.markdown-body table th{font-weight:var(--base-text-weight-semibold,600)}.markdown-body table th,.markdown-body table td{border:1px solid var(--color-border-default);padding:6px 13px}.markdown-body table td>:last-child{margin-bottom:0}.markdown-body table tr{background-color:var(--color-canvas-default);border-top:1px solid var(--color-border-muted)}.markdown-body table tr:nth-child(2n){background-color:var(--color-canvas-subtle)}.markdown-body table img{background-color:transparent}.markdown-body img{max-width:100%;box-sizing:content-box;background-color:var(--color-canvas-default)}.markdown-body img[align=right]{padding-left:20px}.markdown-body img[align=left]{padding-right:20px}.markdown-body .emoji{max-width:none;vertical-align:text-top;background-color:transparent}.markdown-body span.frame{display:block;overflow:hidden}.markdown-body span.frame>span{float:left;width:auto;border:1px solid var(--color-border-default);margin:13px 0 0;padding:7px;display:block;overflow:hidden}.markdown-body span.frame span img{float:left;display:block}.markdown-body span.frame span span{clear:both;color:var(--color-fg-default);padding:5px 0 0;display:block}.markdown-body span.align-center{clear:both;display:block;overflow:hidden}.markdown-body span.align-center>span{text-align:center;margin:13px auto 0;display:block;overflow:hidden}.markdown-body span.align-center span img{text-align:center;margin:0 auto}.markdown-body span.align-right{clear:both;display:block;overflow:hidden}.markdown-body span.align-right>span{text-align:right;margin:13px 0 0;display:block;overflow:hidden}.markdown-body span.align-right span img{text-align:right;margin:0}.markdown-body span.float-left{float:left;margin-right:13px;display:block;overflow:hidden}.markdown-body span.float-left span{margin:13px 0 0}.markdown-body span.float-right{float:right;margin-left:13px;display:block;overflow:hidden}.markdown-body span.float-right>span{text-align:right;margin:13px auto 0;display:block;overflow:hidden}.markdown-body code,.markdown-body tt{white-space:break-spaces;background-color:var(--color-neutral-muted);border-radius:6px;margin:0;padding:.2em .4em;font-size:85%}.markdown-body code br,.markdown-body tt br{display:none}.markdown-body del code{-webkit-text-decoration:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}.markdown-body samp{font-size:85%}.markdown-body pre{word-wrap:normal}.markdown-body pre code{font-size:100%}.markdown-body pre>code{word-break:normal;white-space:pre;background:0 0;border:0;margin:0;padding:0}.markdown-body .highlight{margin-bottom:16px}.markdown-body .highlight pre{word-break:normal;margin-bottom:0}.markdown-body .highlight pre,.markdown-body pre{background-color:var(--color-canvas-subtle);border-radius:6px;padding:16px;font-size:85%;line-height:1.45;overflow:auto}.markdown-body pre code,.markdown-body pre tt{max-width:auto;line-height:inherit;word-wrap:normal;background-color:transparent;border:0;margin:0;padding:0;display:inline;overflow:visible}.markdown-body .csv-data td,.markdown-body .csv-data th{text-align:left;white-space:nowrap;padding:5px;font-size:12px;line-height:1;overflow:hidden}.markdown-body .csv-data .blob-num{text-align:right;background:var(--color-canvas-default);border:0;padding:10px 8px 9px}.markdown-body .csv-data tr{border-top:0}.markdown-body .csv-data th{font-weight:var(--base-text-weight-semibold,600);background:var(--color-canvas-subtle);border-top:0}.markdown-body [data-footnote-ref]:before{content:"["}.markdown-body [data-footnote-ref]:after{content:"]"}.markdown-body .footnotes{color:var(--color-fg-muted);border-top:1px solid var(--color-border-default);font-size:12px}.markdown-body .footnotes ol{padding-left:16px}.markdown-body .footnotes ol ul{margin-top:16px;padding-left:16px;display:inline-block}.markdown-body .footnotes li{position:relative}.markdown-body .footnotes li:target:before{pointer-events:none;content:"";border:2px solid var(--color-accent-emphasis);border-radius:6px;position:absolute;top:-8px;bottom:-8px;left:-24px;right:-8px}.markdown-body .footnotes li:target{color:var(--color-fg-default)}.markdown-body .footnotes .data-footnote-backref g-emoji{font-family:monospace}.markdown-body{background-color:var(--color-canvas-default);color:var(--color-fg-default)}.markdown-body a{color:var(--color-accent-fg);text-decoration:none}.markdown-body a:hover{text-decoration:underline}.markdown-body img[align=center]{margin:0 auto}.markdown-body iframe{background-color:#fff;border:0;margin-bottom:16px}.markdown-body svg.octicon{fill:currentColor}.markdown-body .anchor>.octicon{display:inline}.markdown-body figcaption{text-align:center;padding-top:2px}.markdown-body .highlight .token.keyword,.gfm-highlight .token.keyword{color:var(--color-prettylights-syntax-keyword)}.markdown-body .highlight .token.tag .token.class-name,.markdown-body .highlight .token.tag .token.script .token.punctuation,.gfm-highlight .token.tag .token.class-name,.gfm-highlight .token.tag .token.script .token.punctuation{color:var(--color-prettylights-syntax-storage-modifier-import)}.markdown-body .highlight .token.operator,.markdown-body .highlight .token.number,.markdown-body .highlight .token.boolean,.markdown-body .highlight .token.tag .token.punctuation,.markdown-body .highlight .token.tag .token.script .token.script-punctuation,.markdown-body .highlight .token.tag .token.attr-name,.gfm-highlight .token.operator,.gfm-highlight .token.number,.gfm-highlight .token.boolean,.gfm-highlight .token.tag .token.punctuation,.gfm-highlight .token.tag .token.script .token.script-punctuation,.gfm-highlight .token.tag .token.attr-name{color:var(--color-prettylights-syntax-constant)}.markdown-body .highlight .token.function,.gfm-highlight .token.function{color:var(--color-prettylights-syntax-entity)}.markdown-body .highlight .token.string,.gfm-highlight .token.string{color:var(--color-prettylights-syntax-string)}.markdown-body .highlight .token.comment,.gfm-highlight .token.comment{color:var(--color-prettylights-syntax-comment)}.markdown-body .highlight .token.class-name,.gfm-highlight .token.class-name{color:var(--color-prettylights-syntax-variable)}.markdown-body .highlight .token.regex,.gfm-highlight .token.regex{color:var(--color-prettylights-syntax-string)}.markdown-body .highlight .token.regex .regex-delimiter,.gfm-highlight .token.regex .regex-delimiter{color:var(--color-prettylights-syntax-constant)}.markdown-body .highlight .token.tag .token.tag,.markdown-body .highlight .token.property,.gfm-highlight .token.tag .token.tag,.gfm-highlight .token.property{color:var(--color-prettylights-syntax-entity-tag)}.markdown-body .highlight .token.deleted,.gfm-highlight .token.deleted{color:var(--color-prettylights-syntax-markup-deleted-text);background-color:var(--color-prettylights-syntax-markup-deleted-bg)}.markdown-body .highlight .token.inserted,.gfm-highlight .token.inserted{color:var(--color-prettylights-syntax-markup-inserted-text);background-color:var(--color-prettylights-syntax-markup-inserted-bg)}

Finite Automata algorithm for Pattern Searching (original) (raw)

Last Updated : 23 Jul, 2025

Given a text txt[0..n-1] and a pattern pat[0..m-1], write a function search(char pat[], char txt[]) that prints all occurrences of pat[] in txt[]. You may assume that n > m.
Examples:

Input: txt[] = "THIS IS A TEST TEXT" pat[] = "TEST" Output: Pattern found at index 10

Input: txt[] = "AABAACAADAABAABA" pat[] = "AABA" Output: Pattern found at index 0 Pattern found at index 9 Pattern found at index 12

Pattern

Pattern searching is an important problem in computer science. When we do search for a string in notepad/word file or browser or database, pattern searching algorithms are used to show the search results.

The string-matching automaton is a very useful tool which is used in string matching algorithm.
String matching algorithms build a finite automaton scans the text string T for all occurrences of the pattern P.

FINITE AUTOMATA

?=Sets of input symbols

q. = Initial state

F = Final State

? = Transition function

A finite automaton M is a 5-tuple (Q, q0,A,??), where

Q is a finite set of states,
q0 ? Q is the start state,
A ? Q is a notable set of accepting states,
? is a finite input alphabet,
? is a function from Q x ? into Q called the transition function of M.
The finite automaton starts in state q0 and reads the characters of its input string one at a time. If the automaton is in state q and reads input character a, it moves from state q to state ? (q, a). Whenever its current state q is a member of A, the machine M has accepted the string read so far. An input that is not allowed is rejected.

A finite automaton M induces a function ? called the called the final-state function, from ?* to Q such that ?(w) is the state M ends up in after scanning the string w. Thus, M accepts a string w if and only if ?(w) ? A.

Algorithm-

FINITE AUTOMATA (T, P) State <- 0 for l <- 1 to n State <- ?(State, ti) If State == m then Match Found end end

Why it is efficient?

These string matching automaton are very efficient because they examine each text character exactly once, taking constant time per text character. The matching time used is O(n) where n is the length of Text string.

But the preprocessing time i.e. the time taken to build the finite automaton can be large if ? is large.

Before we discuss Finite Automaton construction, let us take a look at the following Finite Automaton for pattern ACACAGA.

Finite Automata algorithm for Pattern Searching 1

Finite Automata algorithm for Pattern Searching 2

The above diagrams represent graphical and tabular representations of pattern ACACAGA.

Number of states in Finite Automaton will be M+1 where M is length of the pattern. The main thing to construct Finite Automaton is to get the next state from the current state for every possible character.

Given a character x and a state k, we can get the next state by considering the string "pat[0..k-1]x" which is basically concatenation of pattern characters pat[0], pat[1] ...pat[k-1] and the character x. The idea is to get length of the longest prefix of the given pattern such that the prefix is also suffix of "pat[0..k-1]x". The value of length gives us the next state.

For example, let us see how to get the next state from current state 5 and character 'C' in the above diagram. We need to consider the string, "pat[0..4]C" which is "ACACAC". The length of the longest prefix of the pattern such that the prefix is suffix of "ACACAC"is 4 ("ACAC"). So the next state (from state 5) is 4 for character 'C'.

In the following code, computeTF() constructs the Finite Automaton. The time complexity of the computeTF() is O(m^3*NO_OF_CHARS) where m is length of the pattern and NO_OF_CHARS is size of alphabet (total number of possible characters in pattern and text). The implementation tries all possible prefixes starting from the longest possible that can be a suffix of "pat[0..k-1]x". There are better implementations to construct Finite Automaton in O(m*NO_OF_CHARS) (Hint: we can use something like lps array construction in KMP algorithm).

We have covered the better implementation in our next post on pattern searching.

C `

// C program for Finite Automata Pattern searching // Algorithm #include<stdio.h> #include<string.h> #define NO_OF_CHARS 256

int getNextState(char *pat, int M, int state, int x) { // If the character c is same as next character // in pattern,then simply increment state if (state < M && x == pat[state]) return state+1;

// ns stores the result which is next state
int ns, i;

// ns finally contains the longest prefix
// which is also suffix in "pat[0..state-1]c"

// Start from the largest possible value
// and stop when you find a prefix which
// is also suffix
for (ns = state; ns > 0; ns--)
{
    if (pat[ns-1] == x)
    {
        for (i = 0; i < ns-1; i++)
            if (pat[i] != pat[state-ns+1+i])
                break;
        if (i == ns-1)
            return ns;
    }
}

return 0;

}

/* This function builds the TF table which represents4 Finite Automata for a given pattern */ void computeTF(char *pat, int M, int TF[][NO_OF_CHARS]) { int state, x; for (state = 0; state <= M; ++state) for (x = 0; x < NO_OF_CHARS; ++x) TF[state][x] = getNextState(pat, M, state, x); }

/* Prints all occurrences of pat in txt */ void search(char *pat, char *txt) { int M = strlen(pat); int N = strlen(txt);

int TF[M+1][NO_OF_CHARS];

computeTF(pat, M, TF);

// Process txt over FA.
int i, state=0;
for (i = 0; i < N; i++)
{
    state = TF[state][txt[i]];
    if (state == M)
        printf ("\n Pattern found at index %d",
                                       i-M+1);
}

}

// Driver program to test above function int main() { char *txt = "AABAACAADAABAAABAA"; char *pat = "AABA"; search(pat, txt); return 0; }

CPP

// CPP program for Finite Automata Pattern searching // Algorithm #include <bits/stdc++.h> using namespace std; #define NO_OF_CHARS 256

int getNextState(string pat, int M, int state, int x) { // If the character c is same as next character // in pattern,then simply increment state if (state < M && x == pat[state]) return state+1;

// ns stores the result which is next state 
int ns, i; 

// ns finally contains the longest prefix 
// which is also suffix in "pat[0..state-1]c" 

// Start from the largest possible value 
// and stop when you find a prefix which 
// is also suffix 
for (ns = state; ns > 0; ns--) 
{ 
    if (pat[ns-1] == x) 
    { 
        for (i = 0; i < ns-1; i++) 
            if (pat[i] != pat[state-ns+1+i]) 
                break; 
        if (i == ns-1) 
            return ns; 
    } 
} 

return 0; 

}

/* This function builds the TF table which represents4 Finite Automata for a given pattern */ void computeTF(string pat, int M, int TF[][NO_OF_CHARS]) { int state, x; for (state = 0; state <= M; ++state) for (x = 0; x < NO_OF_CHARS; ++x) TF[state][x] = getNextState(pat, M, state, x); }

/* Prints all occurrences of pat in txt */ void search(string pat, string txt) { int M = pat.size(); int N = txt.size();

int TF[M+1][NO_OF_CHARS]; 

computeTF(pat, M, TF); 

// Process txt over FA. 
int i, state=0; 
for (i = 0; i < N; i++) 
{ 
    state = TF[state][txt[i]]; 
    if (state == M) 
        cout<<" Pattern found at index "<< i-M+1<<endl; 
} 

}

// Driver program to test above function int main() { string txt = "AABAACAADAABAAABAA"; string pat = "AABA"; search(pat, txt); return 0; }

//This code is contributed by rathbhupendra

Java

// Java program for Finite Automata Pattern // searching Algorithm class GFG {

static int NO_OF_CHARS = 256;
static int getNextState(char[] pat, int M,  
                         int state, int x)
{
    
    // If the character c is same as next
    // character in pattern,then simply 
    // increment state
    if(state < M && x == pat[state])
        return state + 1;
        
    // ns stores the result which is next state
    int ns, i;

    // ns finally contains the longest prefix
    // which is also suffix in "pat[0..state-1]c"

    // Start from the largest possible value
    // and stop when you find a prefix which
    // is also suffix
    for (ns = state; ns > 0; ns--)
    {
        if (pat[ns-1] == x)
        {
            for (i = 0; i < ns-1; i++)
                if (pat[i] != pat[state-ns+1+i])
                    break;
                if (i == ns-1)
                    return ns;
        }
    }

        return 0;
}

/* This function builds the TF table which
represents Finite Automata for a given pattern */
static void computeTF(char[] pat, int M, int TF[][])
{
    int state, x;
    for (state = 0; state <= M; ++state)
        for (x = 0; x < NO_OF_CHARS; ++x)
            TF[state][x] = getNextState(pat, M, state, x);
}

/* Prints all occurrences of pat in txt */
static void search(char[] pat, char[] txt)
{
    int M = pat.length;
    int N = txt.length;

    int[][] TF = new int[M+1][NO_OF_CHARS];

    computeTF(pat, M, TF);

    // Process txt over FA.
    int i, state = 0;
    for (i = 0; i < N; i++)
    {
        state = TF[state][txt[i]];
        if (state == M)
            System.out.println("Pattern found "
                      + "at index " + (i-M+1));
    }
}

// Driver code
public static void main(String[] args) 
{
    char[] pat = "AABAACAADAABAAABAA".toCharArray();
    char[] txt = "AABA".toCharArray();
    search(txt,pat);
}

}

// This code is contributed by debjitdbb.

Python3

Python program for Finite Automata

Pattern searching Algorithm

NO_OF_CHARS = 256

def getNextState(pat, M, state, x): ''' calculate the next state '''

# If the character c is same as next character 
  # in pattern, then simply increment state

if state < M and x == ord(pat[state]):
    return state+1

i=0
# ns stores the result which is next state

# ns finally contains the longest prefix 
 # which is also suffix in "pat[0..state-1]c"

 # Start from the largest possible value and 
  # stop when you find a prefix which is also suffix
for ns in range(state,0,-1):
    if ord(pat[ns-1]) == x:
        while(i<ns-1):
            if pat[i] != pat[state-ns+1+i]:
                break
            i+=1
        if i == ns-1:
            return ns 
return 0

def computeTF(pat, M): ''' This function builds the TF table which represents Finite Automata for a given pattern ''' global NO_OF_CHARS

TF = [[0 for i in range(NO_OF_CHARS)]\
      for _ in range(M+1)]

for state in range(M+1):
    for x in range(NO_OF_CHARS):
        z = getNextState(pat, M, state, x)
        TF[state][x] = z

return TF

def search(pat, txt): ''' Prints all occurrences of pat in txt ''' global NO_OF_CHARS M = len(pat) N = len(txt) TF = computeTF(pat, M)

# Process txt over FA.
state=0
for i in range(N):
    state = TF[state][ord(txt[i])]
    if state == M:
        print("Pattern found at index: {}".\
               format(i-M+1))

Driver program to test above function

def main(): txt = "AABAACAADAABAAABAA" pat = "AABA" search(pat, txt)

if name == 'main': main()

This code is contributed by Atul Kumar

C#

// C# program for Finite Automata Pattern // searching Algorithm using System;

class GFG {

public static int NO_OF_CHARS = 256; public static int getNextState(char[] pat, int M, int state, int x) {

// If the character c is same as next 
// character in pattern,then simply 
// increment state 
if (state < M && (char)x == pat[state])
{
    return state + 1;
}

// ns stores the result 
// which is next state 
int ns, i;

// ns finally contains the longest 
// prefix which is also suffix in 
// "pat[0..state-1]c" 

// Start from the largest possible  
// value and stop when you find a 
// prefix which is also suffix 
for (ns = state; ns > 0; ns--)
{
    if (pat[ns - 1] == (char)x)
    {
        for (i = 0; i < ns - 1; i++)
        {
            if (pat[i] != pat[state - ns + 1 + i])
            {
                break;
            }
        }
            if (i == ns - 1)
            {
                return ns;
            }
    }
}

    return 0;

}

/* This function builds the TF table which represents Finite Automata for a given pattern */ public static void computeTF(char[] pat, int M, int[][] TF) { int state, x; for (state = 0; state <= M; ++state) { for (x = 0; x < NO_OF_CHARS; ++x) { TF[state][x] = getNextState(pat, M, state, x); } } }

/* Prints all occurrences of pat in txt */ public static void search(char[] pat, char[] txt) { int M = pat.Length; int N = txt.Length;

int[][] TF = RectangularArrays.ReturnRectangularIntArray(M + 1, 
                                                  NO_OF_CHARS);

computeTF(pat, M, TF);

// Process txt over FA. 
int i, state = 0;
for (i = 0; i < N; i++)
{
    state = TF[state][txt[i]];
    if (state == M)
    {
        Console.WriteLine("Pattern found " + 
                          "at index " + (i - M + 1));
    }
}

}

public static class RectangularArrays { public static int[][] ReturnRectangularIntArray(int size1, int size2) { int[][] newArray = new int[size1][]; for (int array1 = 0; array1 < size1; array1++) { newArray[array1] = new int[size2]; }

return newArray;

} }

// Driver code public static void Main(string[] args) { char[] pat = "AABAACAADAABAAABAA".ToCharArray(); char[] txt = "AABA".ToCharArray(); search(txt,pat); } }

// This code is contributed by Shrikant13

JavaScript

`

Output:

Pattern found at index 0 Pattern found at index 9 Pattern found at index 13

Time Complexity: O(m2)
Auxiliary Space: O(m)

References:
Introduction to Algorithms by Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, Clifford Stein