Spaces:
Sleeping
Sleeping
from hazm import * | |
import gradio as gr | |
from sklearn.decomposition import LatentDirichletAllocation | |
from sklearn.feature_extraction.text import CountVectorizer | |
lda = LatentDirichletAllocation(n_components=4,random_state=101) | |
normalizer=Normalizer() | |
lemmatizer=Lemmatizer() | |
stemmer=Stemmer() | |
vectorzer=CountVectorizer(analyzer='word', ngram_range=(1,1)) | |
def compute_seo_score(normalized_text,keywords): | |
tokens=sent_tokenize(normalized_text) | |
x=vectorzer.fit_transform([normalized_text]) | |
features=lda.fit(x) | |
key_words=[vectorzer.get_feature_names_out()[index] for index in features.components_.argsort()[-10:]] | |
query_terms=keywords.split('-') | |
score=0 | |
for i in range(len(key_words)): | |
for query in query_terms: | |
keyterms=key_words[i] | |
if query in [lemmatizer.lemmatize(word) for word in key_words[i]]: | |
score+=1 | |
final_score=score/4 | |
return {'Estimated_number':score/100, | |
'score':final_score/100} | |
def Normalize_text(text,keywords): | |
normalized_text=normalizer.normalize(text) | |
label=compute_seo_score(normalized_text,keywords) | |
return normalized_text,label | |
demo = gr.Interface( | |
fn=Normalize_text, | |
inputs=["text","text"], | |
outputs=["text","label"], | |
) | |
demo.launch() |