Burcin's picture
Upload app.py
b2e388a
raw
history blame
1.92 kB
import gradio as gr
import wikipedia
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
import nltk
nltk.download('wordnet', quiet=True)
nltk.download('punkt', quiet=True)
from nltk.stem import WordNetLemmatizer
from heapq import nlargest
import warnings
warnings.filterwarnings("ignore")
def get_wiki_summary(inp):
text = wikipedia.summary(inp)
print('\033[1m' + "Original Text Fetched from Wikipedia" + '\033[0m')
print(text)
stopwords = list(STOP_WORDS)
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(str(token).lower()) for token in nltk.word_tokenize(text) if str(token) not in punctuation and str(token).lower() not in stopwords and len(token) >1]
word_counts = {}
for token in tokens:
if token in word_counts.keys():
word_counts[token] += 1
else:
word_counts[token] = 1
sentence_scores = {}
for sentence in nltk.sent_tokenize(text):
sentence_scores[sentence] = 0
for wrd in nltk.word_tokenize(sentence):
if lemmatizer.lemmatize(str(wrd).lower()) in word_counts.keys():
sentence_scores[sentence] += word_counts[lemmatizer.lemmatize(str(wrd).lower())]
summary_length = int(len(sentence_scores)*0.20)
summary = str()
for sentence in nltk.sent_tokenize(text):
for i in range(0,summary_length):
if str(sentence).find(str(nlargest(summary_length, sentence_scores, key = sentence_scores.get)[i])) == 0:
summary += str(sentence).replace('\n','')
summary += ' '
print('\033[1m' + "Summarized Text" + '\033[0m')
return summary
if __name__ == '__main__':
gr.Interface(fn=get_wiki_summary, inputs=gr.inputs.Textbox(label="Requested Topic from Wikipedia : "), outputs="text").launch(inline=False, share=True)