Spaces:
Sleeping
Sleeping
File size: 1,508 Bytes
4751360 c42ba0a 4751360 c42ba0a 4751360 c42ba0a 4751360 c42ba0a 4751360 c42ba0a 4751360 c42ba0a 4751360 c42ba0a 4751360 c42ba0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import streamlit as st
import io
import PyPDF2
from transformers import pipeline
from gtts import gTTS
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file):
pdf_stream = io.BytesIO(pdf_file.read())
pdf_reader = PyPDF2.PdfReader(pdf_stream)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() or "" # Handle None for non-text pages
return text
# Function to generate discussion points
def generate_discussion_points(text):
summarizer = pipeline('summarization')
summary = summarizer(text, max_length=600, min_length=300, do_sample=False)
return summary[0]['summary_text']
# Function to convert text to speech
def text_to_speech(text):
tts = gTTS(text=text, lang='en')
tts.save("discussion_points.mp3")
# Streamlit app
st.title("PDF Analysis and Discussion Generator")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
# Extract text from the uploaded PDF
text = extract_text_from_pdf(uploaded_file)
st.subheader("Extracted Text")
st.write(text)
# Generate and display discussion points
st.subheader("Generated Discussion Points")
discussion_points = generate_discussion_points(text)
st.write(discussion_points)
# Convert discussion points to audio and play it
text_to_speech(discussion_points)
audio_file = open("discussion_points.mp3", "rb")
audio_bytes = audio_file.read()
st.audio(audio_bytes, format="audio/mp3") |