alibidaran's picture
Create app.py
a897c22 verified
from hazm import *
import gradio as gr
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
lda = LatentDirichletAllocation(n_components=4,random_state=101)
normalizer=Normalizer()
lemmatizer=Lemmatizer()
stemmer=Stemmer()
vectorzer=CountVectorizer(analyzer='word', ngram_range=(1,1))
def compute_seo_score(normalized_text,keywords):
tokens=sent_tokenize(normalized_text)
x=vectorzer.fit_transform([normalized_text])
features=lda.fit(x)
key_words=[vectorzer.get_feature_names_out()[index] for index in features.components_.argsort()[-10:]]
query_terms=keywords.split('-')
score=0
for i in range(len(key_words)):
for query in query_terms:
keyterms=key_words[i]
if query in [lemmatizer.lemmatize(word) for word in key_words[i]]:
score+=1
final_score=score/4
return {'Estimated_number':score/100,
'score':final_score/100}
def Normalize_text(text,keywords):
normalized_text=normalizer.normalize(text)
label=compute_seo_score(normalized_text,keywords)
return normalized_text,label
demo = gr.Interface(
fn=Normalize_text,
inputs=["text","text"],
outputs=["text","label"],
)
demo.launch()