Spaces:
Runtime error
Runtime error
| # external | |
| import pytest | |
| # project | |
| import textdistance | |
| import csv | |
| import pyarabic.araby as araby | |
| import numpy as np | |
| from statistics import mean | |
| import pickle | |
| import os | |
| import gradio as gr | |
| def textdistance_lcsseq(A,B): | |
| C = textdistance.lcsseq(A,B) | |
| str_return = "" | |
| i=0 | |
| j=0 | |
| inside=False | |
| grade = 0 | |
| seq = 1 | |
| while i<len(B) and j<len(C): | |
| if B[i] == C[j]: | |
| if inside: | |
| str_return += B[i] | |
| else: | |
| inside = True | |
| str_return += "(" | |
| str_return += B[i] | |
| grade += seq | |
| seq += 1 | |
| i+=1 | |
| j+=1 | |
| else: | |
| seq = 1 | |
| grade -= 0.3 | |
| if inside: | |
| str_return += ")" | |
| inside = False | |
| str_return += B[i] | |
| i+=1 | |
| else: | |
| if C[j]==" ": | |
| while not B[i] == C[j]: | |
| str_return += B[i] | |
| i+=1 | |
| j+=1 | |
| str_return += B[i] | |
| i+=1 | |
| if inside: | |
| str_return += ")" | |
| while i<len(B): | |
| grade -= 0.3 | |
| str_return += B[i] | |
| i+=1 | |
| for wordA in A.split(" "): | |
| for wordB in B.split(" "): | |
| if wordA == wordB: | |
| grade+=10 | |
| return str_return,grade | |
| def load(): | |
| quran = [] | |
| filename = "pickle.pkl" | |
| if(not os.path.exists(filename)): | |
| picklefile = open(filename, 'wb') | |
| csv_file = open('quran.csv', encoding="utf-8") | |
| csv_reader = csv.reader(csv_file, delimiter=',') | |
| for i,row in enumerate(csv_reader): | |
| quran.append(araby.strip_diacritics(row[2])) | |
| pickle.dump(quran, picklefile) | |
| else: | |
| picklefile = open(filename, 'rb') | |
| quran = pickle.load(picklefile) | |
| return quran | |
| def search(query,numberOfResults): | |
| quran = load() | |
| lcsseq = [] | |
| lengths = [] | |
| for q in quran: | |
| tmp1,tmp2 = textdistance_lcsseq(query,q) | |
| lcsseq.append(tmp1) | |
| lengths.append(tmp2) | |
| indices = [b[0] for b in sorted(enumerate(lengths),key=lambda i:i[1],reverse=True)] | |
| lengths_sorted = sorted(lengths,reverse=True) | |
| meanOfHead = mean(lengths_sorted[0:min(numberOfResults*3,len(lengths_sorted))]) | |
| toReturn ="" | |
| for i in range(0,min(numberOfResults,len(indices))): | |
| if(lengths[indices[i]] > meanOfHead): | |
| toReturn += "%d : %s"%(lengths[indices[i]],lcsseq[indices[i]]) + "\n" | |
| return toReturn | |
| gr.Interface(fn=search, inputs=["text",gr.Slider(1, 100, value=10, step=1)], outputs=["text"]).launch() |