File size: 3,262 Bytes
b2b8748
61945ee
 
 
 
 
 
b2b8748
 
61945ee
 
 
b2b8748
 
 
61945ee
 
 
 
 
 
b2b8748
61945ee
9433d83
 
 
61945ee
b2b8748
 
61945ee
 
758f813
61945ee
758f813
61945ee
 
b2b8748
61945ee
 
 
 
 
b2b8748
 
61945ee
 
b2b8748
 
61945ee
 
b2b8748
61945ee
b2b8748
61945ee
 
b2b8748
61945ee
b2b8748
 
61945ee
b2b8748
 
 
758f813
37ff84b
9433d83
 
 
758f813
 
b2b8748
758f813
 
 
9433d83
 
 
 
61945ee
b2b8748
 
61945ee
b2b8748
37ff84b
6480c28
758f813
61945ee
 
 
b2b8748
 
61945ee
b2b8748
61945ee
758f813
61945ee
 
 
6480c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import streamlit as st 
from PyPDF2 import PdfReader

from transformers import pipeline

summarizer = pipeline(task="summarization")

# Basic text summary
st.set_page_config(
    page_title='Text Summarizer'
)

st.title('Text Summarization')

# Text summary function

def summarize_text(text):
    summary = summarizer(text)
    summary = summary[0]['summary_text']
    return summary

input = st.text_area('Enter long text')

with st.spinner('Summarizing')
    output = summarize_text(input)
    st.success('Summary done πŸ‘πŸΎ')

if st.button('Summarize text'):
    st.markdown(f'''
            <div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
            <h4>Results</h4>
                <div>
                    {output}
                </div>
            </div>
                ''', unsafe_allow_html=True)
    st.success('Done')
    
    
#####

# PDF summary section

st.subheader('PDF summary')

try:
    # Upload file
    uploaded_pdf = st.file_uploader('Choose a pdf file', type=['pdf'])

    if uploaded_pdf is not None:
        st.success('Succesfully uploaded')
        
    # Extract PDF content    
    def extract_text(pdf_file):
        pdf_content = PdfReader(pdf_file)
        pages =pdf_content.pages
        # page_count = len(pages)
        
        page_text_stack = []

        for page in pages:
            page_text = page.extract_text()
            page_text_stack.append(page_text)

        return page_text_stack
    with st.spinner('Extracting text from PDF...')
        pdf_input = extract_text(uploaded_pdf)
        st.success('Text extracted')

    pdf_output = []
    
    for stack in pdf_input:
        summarize_text(stack)
        pdf_output.append(stack)

    with st.spinner('Summarizing extracted text...')
        pdf_summary = '\n'.join(pdf_output)
        st.success('Summary complete')

except: # Handle blank file error
    st.error('Please select a valid file')

#  Prepare output 


# summary_pdf = pdfkit.from_string(pdf_output, 'Summary.pdf')



if st.button('Summarize pdf page'):
    st.markdown(f'''
            <div style="background-color: black; color: white; font-weight: bold; padding: 1rem; border-radius: 10px;">
            <h4>Download the summary here </h4>
                <p>
                    {pdf_summary}
                </p>
            </div>
                ''', unsafe_allow_html=True)
    # st.write('Download summary pdf here')
    # download_button = st.download_button(summary_pdf, label='Download summary')
    st.success('PDF page summarized :)', icon="βœ…")






st.write('')
st.write('')


st.markdown("<hr style='border: 1px dashed #ddd; margin: 2rem;'>", unsafe_allow_html=True) #Horizontal line

st.markdown("""
    <div style="text-align: center; padding: 1rem;">
        Project by <a href="https://github.com/ChibuzoKelechi" target="_blank" style="color: white; font-weight: bold; text-decoration: none;">
         kelechi_tensor</a>
    </div>
    
    <div style="text-align: center; padding: 1rem;">
        Data from <a href="https://kaggle.com" target="_blank" style="color: lightblue; font-weight: bold; text-decoration: none;">
         Kaggle</a>
    </div>
""",
unsafe_allow_html=True)

# Peace Out :)