Start Typing !!! type the highlighted character. You can't scroll once you start typing.
filename

rabin_karp.py

# Numbers of alphabet which we call base

alphabet_size = 256

# Modulus to hash a string

modulus = 1000003

def rabin_karp(pattern, text):

"""

The Rabin-Karp Algorithm for finding a pattern within a piece of text

with complexity O(nm), most efficient when it is used with multiple patterns

as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes.

This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify

1) Calculate pattern hash

2) Step through the text one character at a time passing a window with the same length as the pattern

calculating the hash of the text within the window compare it with the hash of the pattern. Only testing

equality if the hashes match

"""

p_len = len(pattern)

t_len = len(text)

if p_len > t_len:

return False

p_hash = 0

text_hash = 0

modulus_power = 1

# Calculating the hash of pattern and substring of text

for i in range(p_len):

p_hash = (ord(pattern[i]) + p_hash * alphabet_size) % modulus

text_hash = (ord(text[i]) + text_hash * alphabet_size) % modulus

if i == p_len - 1:

continue

modulus_power = (modulus_power * alphabet_size) % modulus

for i in range(0, t_len - p_len + 1):

if text_hash == p_hash and text[i : i + p_len] == pattern:

return True

if i == t_len - p_len:

continue

# Calculate the https://en.wikipedia.org/wiki/Rolling_hash

text_hash = (

(text_hash - ord(text[i]) * modulus_power) * alphabet_size

+ ord(text[i + p_len])

) % modulus

return False

def test_rabin_karp():

"""

>>> test_rabin_karp()

Success.

"""

# Test 1)

pattern = "abc1abc12"

text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"

text2 = "alskfjaldsk23adsfabcabc"

assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)

# Test 2)

pattern = "ABABX"

text = "ABABZABABYABABX"

assert rabin_karp(pattern, text)

# Test 3)

pattern = "AAAB"

text = "ABAAAAAB"

assert rabin_karp(pattern, text)

# Test 4)

pattern = "abcdabcy"

text = "abcxabcdabxabcdabcdabcy"

assert rabin_karp(pattern, text)

# Test 5)

pattern = "Lü"

text = "Lüsai"

assert rabin_karp(pattern, text)

pattern = "Lue"

assert not rabin_karp(pattern, text)

print("Success.")

if __name__ == "__main__":

test_rabin_karp()