manga-reader / plugins /search_engine.py
taslim19
drag
4051191
from typing import TypeVar, Callable, List
T = TypeVar("T")
class KMP:
def __init__(self, pattern: str) -> None:
self.pattern = pattern
self.pi = self.computeLPSArray()
def computeLPSArray(self):
length = 0 # length of the previous longest prefix suffix
M = len(self.pattern)
lps = [0] * M
i = 1
# the loop calculates lps[i] for i = 1 to M-1
while i < M:
if self.pattern[i] == self.pattern[length]:
length += 1
lps[i] = length
i += 1
else:
# This is tricky. Consider the example.
# AAACAAAA and i = 7. The idea is similar
# to search step.
if length != 0:
length = lps[length - 1]
# Also, note that we do not increment i here
else:
lps[i] = 0
i += 1
return lps
# Python program for KMP Algorithm
def KMPSearch(self, txt):
pat = self.pattern
M = len(pat)
N = len(txt)
lps = self.pi
i = 0 # index for txt[]
j = 0 # index for pat[]
while i < N:
if pat[j] == txt[i]:
i += 1
j += 1
if j == M:
return True
# mismatch after j matches
elif i < N and pat[j] != txt[i]:
# Do not match lps[0..lps[j-1]] characters,
# they will match anyway
if j != 0:
j = lps[j - 1]
else:
i += 1
return False
def search(query: str, documents: List[T], get_title: Callable[[T], str], get_text: Callable[[T], str]):
query = query.lower()
qwords = query.split()
qkmp = [KMP(word) for word in qwords]
ranking = []
for doc in documents:
score = 0
title = get_title(doc).lower()
text = get_text(doc).lower()
for kmp in qkmp:
if kmp.KMPSearch(text):
score += 1
if kmp.KMPSearch(title):
score += 5
if title == query:
score += 1000
if score > 0:
ranking.append((score, len(ranking), doc))
ranking.sort()
ranking.reverse()
return [doc for (_, _, doc) in ranking]