Spaces:

ml6team
/

distilbart-tos-summarizer-tosdr

Build error

App Files Files Community

sdhanabal1 commited on Jan 26, 2022

Commit

8d4dd5e

1 Parent(s): 90f2ef6

Add extractive summary information using LSA

Browse files

Files changed (2) hide show

app.py +75 -20
requirements.txt +4 -2

app.py CHANGED Viewed

@@ -2,13 +2,31 @@ from textwrap import wrap
 from transformers import pipeline
 import streamlit as st
 st.markdown('# Terms & conditions abstractive summarization model :pencil:')
-st.write('This app summarizes the provided terms & conditions.')
 st.write('Information about the model :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')
 st.markdown("""
 To use this:
-- Copy terms & conditions and hit 'Summarize':point_down:""")
 @st.cache(allow_output_mutation=True,
@@ -26,31 +44,68 @@ def load_model():
 tc_pipeline = load_model()
-if 'text' not in st.session_state:
-    st.session_state['text'] = ""
 st.header("Input")
-form = st.form(key='terms-and-conditions')
-placeholder = form.empty()
-placeholder.empty()
-tc_text = placeholder.text_area(
-    value=st.session_state.text,
-    label='Terms & conditions text:',
-    key='tc_text',
-    height=240
-)
-submit_button = form.form_submit_button(label='Summarize')
 st.header("Output")
-if submit_button:
-    base_text = st.session_state.tc_text
-    output_text = " ".join([result['summary_text'] for result in tc_pipeline(wrap(base_text, 2048))])
     st.markdown('#####')
     st.text_area(
-        value=output_text,
-        label="Summary",
         height=240
     )

 from transformers import pipeline
 import streamlit as st
+from sumy.parsers.plaintext import PlaintextParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.nlp.stemmers import Stemmer
+from sumy.summarizers.lsa import LsaSummarizer
+from sumy.utils import get_stop_words
+import nltk
+nltk.download('punkt')
+DEFAULT_LANGUAGE = "english"
+DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH = 10
+stemmer = Stemmer(DEFAULT_LANGUAGE)
+lsa_summarizer = LsaSummarizer(stemmer)
+lsa_summarizer.stop_words = get_stop_words(language=DEFAULT_LANGUAGE)
 st.markdown('# Terms & conditions abstractive summarization model :pencil:')
+st.write('This app provides the abstract summary of the provided terms & conditions. '
+         'The abstractive summarization is preceded by LSA (Latent Semantic Analysis) extractive summarization')
 st.write('Information about the model :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')
 st.markdown("""
 To use this:
+- Number of sentences to be extracted is configurable
+- Copy terms & conditions and hit 'Summarize'
+""")
 @st.cache(allow_output_mutation=True,
 tc_pipeline = load_model()
+if 'tc_text' not in st.session_state:
+    st.session_state['tc_text'] = ""
+if 'sentences_length' not in st.session_state:
+    st.session_state['sentences_length'] = DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH
 st.header("Input")
+with st.form(key='terms-and-conditions'):
+    sentences_length_input = st.number_input(
+        label='Number of sentences to be extracted:',
+        min_value=1,
+        value=st.session_state.sentences_length
+    )
+    tc_text_input = st.text_area(
+        value=st.session_state.tc_text,
+        label='Terms & conditions text:',
+        height=240
+    )
+    submit_button = st.form_submit_button(label='Summarize')
 st.header("Output")
+def generate_abstractive_summary(summary) -> str:
+    summary_text = " ".join([result['summary_text'] for result in tc_pipeline(wrap(summary, 2048))])
+    return summary_text
+def generate_extractive_summary(text, sentences_count: int) -> str:
+    parser = PlaintextParser.from_string(text, Tokenizer(DEFAULT_LANGUAGE))
+    summarized_sentences = lsa_summarizer(parser.document, sentences_count)
+    summarized_text = " ".join([sentence._text for sentence in summarized_sentences])
+    return summarized_text
+def display_abstractive_summary(summary) -> None:
+    st.subheader("Abstractive Summary")
+    st.markdown('#####')
+    st.text_area(
+        value=summary,
+        label='',
+        height=240
+    )
+def display_extractive_summary(summary) -> None:
+    st.subheader("Extractive Summary")
     st.markdown('#####')
     st.text_area(
+        value=summary,
+        label='',
         height=240
     )
+if submit_button:
+    tc_text = tc_text_input
+    sentences_length = sentences_length_input
+    extract_summary = generate_extractive_summary(tc_text, sentences_length)
+    abstract_summary = generate_abstractive_summary(extract_summary)
+    display_extractive_summary(extract_summary)
+    display_abstractive_summary(abstract_summary)

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
 nlpaug==1.1.7
-streamlit==1.0.0
 torch==1.9.1
 torchvision==0.10.1
-transformers==4.10.3

 nlpaug==1.1.7
+streamlit
 torch==1.9.1
 torchvision==0.10.1
+transformers
+sumy==0.9.0
+nltk