Cse2012 Design and Analysis of Algorithms Lab Digital Assignment 2

Download as pdf or txt
Download as pdf or txt
You are on page 1of 18

CSE2012 DESIGN AND ANALYSIS OF

ALGORITHMS
LAB DIGITAL ASSIGNMENT 2

Name: Akshat Swaminath

Regd. No: 20BCE2231

Slot: L47+48
Submitted to: Mr. Sivanesan S Sir
Aim: To write an executable program for Naïve String-matching with proper pseudocode
code and flow chart.
Problem Analysis:
Naïve String-matching Algorithms is basically brute force for string matching algorithm. It is
very easy to understand as depicted below. This string matching is one of least efficient. Let
us consider a string of length of n. The main objective of this searching algorithm is to find
the position of the given pattern length of which is less than n in minimum number
searches/iterations
Example of Naïve String-matching Algorithms
Flowchart:
Pseudocode:
void search(char* pat, char* txt)
{
int M = strlen(pat);
int N = strlen(txt);
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
int j;
/* For current index i, check for pattern match */
for (j = 0; j < M; j++)
if (txt[i + j] != pat[j])
break;
if (j == M) // if pat[0...M-1] = txt[i, i+1, ...i+M-1]
printf("Pattern found at index %d \n", i);
}
Actual code:
#include <bits/stdc++.h>
using namespace std;

void search(char* pat, char* txt)


{
int M = strlen(pat);
int N = strlen(txt);

/* A loop to slide pat[] one by one */


for (int i = 0; i <= N - M; i++) {
int j;

/* For current index i, check for pattern match */


for (j = 0; j < M; j++)
if (txt[i + j] != pat[j])
break;

if (j == M) // if pat[0...M-1] = txt[i, i+1, ...i+M-1]


cout << "Pattern found at index " << i << endl;
}
}

// Driver Code
int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
search(pat, txt);
return 0;
}
Output screenshot:
Aim: Using KMP algorithm for pattern searching
Analysis:
The KMP matching algorithm uses property of pattern having same sub-patterns appearing
more than once in the pattern of the pattern. The basic idea behind KMP’s algorithm is:
whenever we detect a mismatch (after some matches), we already know some of the
characters in the text of the next window. We take advantage of this information to avoid
matching the characters that we know will anyway match.
Flowchart :
Pseudocode:
n ← length [Text]
m ← length [Pattern]
LPS← COMPUTE-PREFIX-FUNCTION (P)
q←0 // numbers of characters matched
for i ← 1 to n // scan S from left to right
do while q > 0 and Pattern[q + 1] ≠ T [i]
do q ← LPS[q] // next character does not match
If Pattern[q + 1] = Text[i]
then q ← q + 1 // next character matches
If q = m // is all of p matched?
then print "Pattern occurs with shift" i - m
q ← LPS[q] // look for the next match
Code
#include <bits/stdc++.h>

void computeLPSArray(char* pat, int M, int* lps);

// Prints occurrences of txt[] in pat[]


void KMPSearch(char* pat, char* txt)
{
int M = strlen(pat);
int N = strlen(txt);

// create lps[] that will hold the longest prefix suffix


// values for pattern
int lps[M];

// Preprocess the pattern (calculate lps[] array)


computeLPSArray(pat, M, lps);

int i = 0; // index for txt[]


int j = 0; // index for pat[]
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}

if (j == M) {
printf("Found pattern at index %d ", i - j);
j = lps[j - 1];
}
// mismatch after j matches
else if (i < N && pat[j] != txt[i]) {
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
}

// Fills lps[] for given patttern pat[0..M-1]


void computeLPSArray(char* pat, int M, int* lps)
{
// length of the previous longest prefix suffix
int len = 0;

lps[0] = 0; // lps[0] is always 0

// the loop calculates lps[i] for i = 1 to M-1


int i = 1;
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else (pat[i] != pat[len])
{
if (len != 0) {
len = lps[len - 1];

}
Else if (len == 0)
{
lps[i] = 0;
i++;
}
}
}
}

int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
KMPSearch(pat, txt);
return 0;
}
Output screenshots
AIM:
Using Rabin-Karp algorithm for pattern searching
Problem analysis:
Rabin-Karp algorithm slides the pattern one by one. Rabin Karp algorithm matches the hash
value of the pattern with the hash value of current substring of text, and if the hash values
match then only it starts matching individual characters.
Flowchart:
Pseudocode :
Begin
patLen := pattern Length
strLen := string Length
patHash := 0 and strHash := 0, h := 1
maxChar := total number of characters in character set

for index i of all character in pattern, do


h := (h*maxChar) mod prime
done

for all character index i of pattern, do


patHash := (maxChar*patHash + pattern[i]) mod prime
strHash := (maxChar*strHash + text[i]) mod prime
done

for i := 0 to (strLen - patLen), do


if patHash = strHash, then
for charIndex := 0 to patLen -1, do
if text[i+charIndex] ≠ pattern[charIndex], then
break the loop
done

if charIndex = patLen, then


print the location i as pattern found at i position.
if i < (strLen - patLen), then
strHash := (maxChar*(strHash – text[i]*h)+text[i+patLen]) mod prime, then
if strHash < 0, then
strHash := strHash + prime
done
End
CODE :
#include <bits/stdc++.h>
using namespace std;

#define d 256

void search(char pat[], char txt[], int q)


{
int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for pattern
int t = 0; // hash value for txt
int h = 1;

// The value of h would be "pow(d, M-1)%q"


for (i = 0; i < M - 1; i++)
h = (h * d) % q;

for (i = 0; i < M; i++)


{
p = (d * p + pat[i]) % q;
t = (d * t + txt[i]) % q;
}

for (i = 0; i <= N - M; i++)


{
if ( p == t )
{
bool flag = true;
for (j = 0; j < M; j++)
{
if (txt[i+j] != pat[j])
{
flag = false;
break;
}
if(flag)
cout<<i<<" ";

}
if (j == M)
cout<<"Pattern found at index "<< i<<endl;
}
if ( i < N-M )
{
t = (d*(t - txt[i]*h) + txt[i+M])%q;

if (t < 0)
t = (t + q);
}
}
}

/* Driver code */
int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
int q;
cin>>q;
search(pat, txt, q);
return 0;
}

Output:
Result analysis:
Complexity analysis table
Name of Name of sample Time Space Ranking based
techniques outcome complexity complexity on
(in Big O complexities*
notation )
Naive String Naive String O(m*n) O(n) 1
matching matching
Algorithm Algorithm
(Brute force)
KMP algorithm KMP algorithm O(n) O(n) 2

*one being the least complex


Optimised code
#include <bits/stdc++.h>
using namespace std;

void search(string pat, string txt)


{
int M = pat.size();
int N = txt.size();
int i = 0;

while (i <= N - M)
{
int j;

for (j = 0; j < M; j++)


if (txt[i + j] != pat[j])
break;

if (j == M) // if pat[0...M-1] = txt[i, i+1, ...i+M-1]


{
cout << "Pattern found at index " << i << endl;
i = i + M;
}
else if (j == 0)
i = i + 1;
else
i = i + j; // slide the pattern by j
}
}

int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
search(pat, txt);
return 0;
}

Output screenshot

Conclusion:
We have written more optimised code for Naive String-matching Algorithm for a specific
case i.e., when all the characters of the pattern are different.
We have analysed and studied the KMP algorithm and Rabin-Karp algorithm.

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy