Burcin commited on
Commit
7f35581
·
1 Parent(s): 92e0d6e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -10
app.py CHANGED
@@ -28,7 +28,7 @@ def get_wiki_summary_by_pegasus(inp):
28
  model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
29
  summary = model.generate(**tokens)
30
  return tokenizer.decode(summary)
31
- """
32
 
33
 
34
  def get_wiki_summary_by_lem(inp):
@@ -79,22 +79,53 @@ def get_wiki_summary_by_lem(inp):
79
  return summary
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
 
84
  desc = """This interface allows you to summarize Wikipedia explanations. Only requirement is to write the topic. For summarization this model uses extractive summarization method and the number of sentences in the output depends on the length of the original text."""
85
 
 
 
86
 
87
- sample = [['Europe'], ['Great Depression'], ['Crocodile Dundee']]
88
 
 
89
 
90
- iface = Parallel(gr.Interface(fn=get_wiki_original_text, inputs=gr.inputs.Textbox(label="Requested Topic from Wikipedia : "), outputs="text"),
91
- gr.Interface(fn=get_wiki_summary_by_lem, inputs=gr.inputs.Textbox(label="Requested Topic from Wikipedia : "), outputs="text"),
92
- #gr.Interface(fn=get_wiki_summary_by_pegasus, inputs=gr.inputs.Textbox(label="Requested Topic from Wikipedia : "), outputs="text"),
93
-
94
-
95
-
96
-
97
- # get_wiki_original_text,get_wiki_summary_by_lem, get_wiki_summary_by_pegasus,
98
  title= 'Text Summarizer',
99
  description = desc,
100
  examples=sample,
 
28
  model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
29
  summary = model.generate(**tokens)
30
  return tokenizer.decode(summary)
31
+ """
32
 
33
 
34
  def get_wiki_summary_by_lem(inp):
 
79
  return summary
80
 
81
 
82
+ def get_wiki_summary_by_tfidf(inp):
83
+ text = wikipedia.summary(inp)
84
+
85
+ tfidf_vectorizer = TfidfVectorizer(ngram_range=(1,3))
86
+
87
+ all_sentences = [str(sent) for sent in nltk.sent_tokenize(text)]
88
+ sentence_vectors = tfidf_vectorizer.fit_transform(all_sentences)
89
+
90
+ sentence_scores_vector = np.hstack(np.array(sentence_vectors.sum(axis=1)))
91
+
92
+ sentence_scores = dict(zip(all_sentences, sentence_scores_vector))
93
+
94
+ summary_length = 0
95
+
96
+ if len(sentence_scores) > 5 :
97
+ summary_length = int(len(sentence_scores)*0.20)
98
+ else:
99
+ summary_length = int(len(sentence_scores)*0.50)
100
+
101
+ summary = str()
102
+
103
+ for sentence in nltk.sent_tokenize(text):
104
+ for i in range(0,summary_length):
105
+ if str(sentence).find(str(nlargest(summary_length, sentence_scores, key = sentence_scores.get)[i])) == 0:
106
+ summary += str(sentence).replace('\n','')
107
+ summary += ' '
108
+
109
+
110
+ return summary
111
+
112
 
113
 
114
  desc = """This interface allows you to summarize Wikipedia explanations. Only requirement is to write the topic. For summarization this model uses extractive summarization method and the number of sentences in the output depends on the length of the original text."""
115
 
116
+
117
+ x = """ Europe """
118
 
119
+ y = ''' Great Depression '''
120
 
121
+ z = ''' Crocodile Dundee '''
122
 
123
+ sample = [[x],[y],[z]]
124
+
125
+
126
+ iface = Parallel(gr.Interface(fn=get_wiki_original_text, inputs=gr.inputs.Textbox(label="Text"), outputs="Original Text"),
127
+ gr.Interface(fn=get_wiki_summary_by_lem, inputs=gr.inputs.Textbox(label="Text"), outputs="Extractive Summarization v1"),
128
+ gr.Interface(fn=get_wiki_summary_by_tfidf, inputs=gr.inputs.Textbox(label="Text"), outputs="Extractive Summarization v2"),
 
 
129
  title= 'Text Summarizer',
130
  description = desc,
131
  examples=sample,