Spaces:
Running
Running
File size: 2,406 Bytes
4051191 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
from typing import TypeVar, Callable, List
T = TypeVar("T")
class KMP:
def __init__(self, pattern: str) -> None:
self.pattern = pattern
self.pi = self.computeLPSArray()
def computeLPSArray(self):
length = 0 # length of the previous longest prefix suffix
M = len(self.pattern)
lps = [0] * M
i = 1
# the loop calculates lps[i] for i = 1 to M-1
while i < M:
if self.pattern[i] == self.pattern[length]:
length += 1
lps[i] = length
i += 1
else:
# This is tricky. Consider the example.
# AAACAAAA and i = 7. The idea is similar
# to search step.
if length != 0:
length = lps[length - 1]
# Also, note that we do not increment i here
else:
lps[i] = 0
i += 1
return lps
# Python program for KMP Algorithm
def KMPSearch(self, txt):
pat = self.pattern
M = len(pat)
N = len(txt)
lps = self.pi
i = 0 # index for txt[]
j = 0 # index for pat[]
while i < N:
if pat[j] == txt[i]:
i += 1
j += 1
if j == M:
return True
# mismatch after j matches
elif i < N and pat[j] != txt[i]:
# Do not match lps[0..lps[j-1]] characters,
# they will match anyway
if j != 0:
j = lps[j - 1]
else:
i += 1
return False
def search(query: str, documents: List[T], get_title: Callable[[T], str], get_text: Callable[[T], str]):
query = query.lower()
qwords = query.split()
qkmp = [KMP(word) for word in qwords]
ranking = []
for doc in documents:
score = 0
title = get_title(doc).lower()
text = get_text(doc).lower()
for kmp in qkmp:
if kmp.KMPSearch(text):
score += 1
if kmp.KMPSearch(title):
score += 5
if title == query:
score += 1000
if score > 0:
ranking.append((score, len(ranking), doc))
ranking.sort()
ranking.reverse()
return [doc for (_, _, doc) in ranking]
|