Spaces:
Runtime error
Runtime error
File size: 3,262 Bytes
b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee 9433d83 61945ee b2b8748 61945ee 758f813 61945ee 758f813 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 61945ee b2b8748 758f813 37ff84b 9433d83 758f813 b2b8748 758f813 9433d83 61945ee b2b8748 61945ee b2b8748 37ff84b 6480c28 758f813 61945ee b2b8748 61945ee b2b8748 61945ee 758f813 61945ee 6480c28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline
summarizer = pipeline(task="summarization")
# Basic text summary
st.set_page_config(
page_title='Text Summarizer'
)
st.title('Text Summarization')
# Text summary function
def summarize_text(text):
summary = summarizer(text)
summary = summary[0]['summary_text']
return summary
input = st.text_area('Enter long text')
with st.spinner('Summarizing')
output = summarize_text(input)
st.success('Summary done ππΎ')
if st.button('Summarize text'):
st.markdown(f'''
<div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
<h4>Results</h4>
<div>
{output}
</div>
</div>
''', unsafe_allow_html=True)
st.success('Done')
#####
# PDF summary section
st.subheader('PDF summary')
try:
# Upload file
uploaded_pdf = st.file_uploader('Choose a pdf file', type=['pdf'])
if uploaded_pdf is not None:
st.success('Succesfully uploaded')
# Extract PDF content
def extract_text(pdf_file):
pdf_content = PdfReader(pdf_file)
pages =pdf_content.pages
# page_count = len(pages)
page_text_stack = []
for page in pages:
page_text = page.extract_text()
page_text_stack.append(page_text)
return page_text_stack
with st.spinner('Extracting text from PDF...')
pdf_input = extract_text(uploaded_pdf)
st.success('Text extracted')
pdf_output = []
for stack in pdf_input:
summarize_text(stack)
pdf_output.append(stack)
with st.spinner('Summarizing extracted text...')
pdf_summary = '\n'.join(pdf_output)
st.success('Summary complete')
except: # Handle blank file error
st.error('Please select a valid file')
# Prepare output
# summary_pdf = pdfkit.from_string(pdf_output, 'Summary.pdf')
if st.button('Summarize pdf page'):
st.markdown(f'''
<div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
<h4>Download the summary here </h4>
<p>
{pdf_summary}
</p>
</div>
''', unsafe_allow_html=True)
# st.write('Download summary pdf here')
# download_button = st.download_button(summary_pdf, label='Download summary')
st.success('PDF page summarized :)', icon="β
")
st.write('')
st.write('')
st.markdown("<hr style='border: 1px dashed #ddd; margin: 2rem;'>", unsafe_allow_html=True) #Horizontal line
st.markdown("""
<div style="text-align: center; padding: 1rem;">
Project by <a href="https://github.com/ChibuzoKelechi" target="_blank" style="color: white; font-weight: bold; text-decoration: none;">
kelechi_tensor</a>
</div>
<div style="text-align: center; padding: 1rem;">
Data from <a href="https://kaggle.com" target="_blank" style="color: lightblue; font-weight: bold; text-decoration: none;">
Kaggle</a>
</div>
""",
unsafe_allow_html=True)
# Peace Out :) |