from typing import TypeVar, Callable, List T = TypeVar("T") class KMP: def __init__(self, pattern: str) -> None: self.pattern = pattern self.pi = self.computeLPSArray() def computeLPSArray(self): length = 0 # length of the previous longest prefix suffix M = len(self.pattern) lps = [0] * M i = 1 # the loop calculates lps[i] for i = 1 to M-1 while i < M: if self.pattern[i] == self.pattern[length]: length += 1 lps[i] = length i += 1 else: # This is tricky. Consider the example. # AAACAAAA and i = 7. The idea is similar # to search step. if length != 0: length = lps[length - 1] # Also, note that we do not increment i here else: lps[i] = 0 i += 1 return lps # Python program for KMP Algorithm def KMPSearch(self, txt): pat = self.pattern M = len(pat) N = len(txt) lps = self.pi i = 0 # index for txt[] j = 0 # index for pat[] while i < N: if pat[j] == txt[i]: i += 1 j += 1 if j == M: return True # mismatch after j matches elif i < N and pat[j] != txt[i]: # Do not match lps[0..lps[j-1]] characters, # they will match anyway if j != 0: j = lps[j - 1] else: i += 1 return False def search(query: str, documents: List[T], get_title: Callable[[T], str], get_text: Callable[[T], str]): query = query.lower() qwords = query.split() qkmp = [KMP(word) for word in qwords] ranking = [] for doc in documents: score = 0 title = get_title(doc).lower() text = get_text(doc).lower() for kmp in qkmp: if kmp.KMPSearch(text): score += 1 if kmp.KMPSearch(title): score += 5 if title == query: score += 1000 if score > 0: ranking.append((score, len(ranking), doc)) ranking.sort() ranking.reverse() return [doc for (_, _, doc) in ranking]