Text-Summarize / app.py
amirgame197's picture
Update app.py
5f02413 verified
raw
history blame
2.6 kB
import streamlit as st
from txtai.pipeline import Summary, Textractor
from PyPDF2 import PdfReader
from deep_translator import GoogleTranslator
st.set_page_config(layout="wide")
@st.cache_resource
def text_summary(text, translate_fa, maxlength=None):
if(len(text) > 2000):
translatedtext1 = GoogleTranslator(source='auto', target='en').translate(text[0:2000])
translatedtext2 = GoogleTranslator(source='auto', target='en').translate(text[2000:(len(text))])
translatedtext = translatedtext1 + translatedtext2
print(translatedtext1 + " .....WITH..... " + translatedtext2)
else:
translatedtext = GoogleTranslator(source='auto', target='en').translate(text)
print(translatedtext)
#create summary instance
summary = Summary()
result = summary(translatedtext)
if translate_fa:
result = GoogleTranslator(source='auto', target='fa').translate(result)
return result
def extract_text_from_pdf(file_path):
# Open the PDF file using PyPDF2
with open(file_path, "rb") as f:
reader = PdfReader(f)
page = reader.pages[0]
text = page.extract_text()
return text
choice = st.selectbox("Select your choice", ["Summarize Text", "Summarize Document"])
translate_fa = st.toggle('Translate to Persian', value=True)
if choice == "Summarize Text":
st.subheader("Summarize Text")
input_text = st.text_area("Enter your text here")
if input_text is not None:
if st.button("Summarize Text"):
col1, col2 = st.columns([1,1])
with col1:
st.markdown("**Summary Result**")
result = text_summary(input_text, translate_fa)
st.success(result)
elif choice == "Summarize Document":
st.subheader("Summarize Document")
input_file = st.file_uploader("Upload your document here", type=['pdf'])
if input_file is not None:
if st.button("Summarize Document"):
with open("doc_file.pdf", "wb") as f:
f.write(input_file.getbuffer())
col1, col2 = st.columns([1,1])
with col1:
st.info("File uploaded successfully")
extracted_text = extract_text_from_pdf("doc_file.pdf")
st.markdown("**Extracted Text is Below:**")
st.info(extracted_text)
with col2:
st.markdown("**Summary Result**")
text = extract_text_from_pdf("doc_file.pdf")
doc_summary = text_summary(text, translate_fa)
st.success(doc_summary)