import gradio as gr import wikipedia import spacy from spacy.lang.en.stop_words import STOP_WORDS from string import punctuation import nltk nltk.download('wordnet', quiet=True) from nltk.stem import WordNetLemmatizer from heapq import nlargest import warnings warnings.filterwarnings("ignore") def get_wiki_summary(rawtext = input('Requested Topic from Wikipedia : ')): text = wikipedia.summary(rawtext) print('\033[1m' + "Original Text Fetched from Wikipedia" + '\033[0m') print(text) stopwords = list(STOP_WORDS) nlp = spacy.load("en_core_web_sm") lemmatizer = WordNetLemmatizer() tokens = [lemmatizer.lemmatize(str(token).lower()) for token in nlp(text) if str(token) not in punctuation and str(token).lower() not in stopwords and len(token) >1] word_counts = {} for token in tokens: if token in word_counts.keys(): word_counts[token] += 1 else: word_counts[token] = 1 sentence_scores = {} for sentence in nlp(text).sents: sentence_scores[sentence] = 0 for wrd in sentence: if lemmatizer.lemmatize(str(wrd).lower()) in word_counts.keys(): sentence_scores[sentence] += word_counts[lemmatizer.lemmatize(str(wrd).lower())] summary_length = int(len(sentence_scores)*0.20) summary = str() for sentence in nlp(text).sents: for i in range(0,summary_length): if str(sentence).find(str(nlargest(summary_length, sentence_scores, key = sentence_scores.get)[i])) == 0: summary += str(sentence) summary += ' ' print('\033[1m' + "Summarized Text" + '\033[0m') return print(summary) gr.Interface(fn=get_wiki_summary, inputs=gr.inputs.Textbox(lines=7, label="Input Text"), outputs="text").launch(inline=False)