textaudio / app.py
tahirsher's picture
Create app.py
a30fe61 verified
import streamlit as st
import docx
import PyPDF2
from transformers import pipeline
import tempfile
# Load Hugging Face model
@st.cache_resource
def load_pipeline():
return pipeline("question-answering", model="deepset/roberta-base-squad2")
qa_pipeline = load_pipeline()
def read_pdf(file):
text = ""
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text
def read_word(file):
doc = docx.Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
def extract_text(uploaded_file):
file_type = uploaded_file.name.split('.')[-1].lower()
if file_type == 'pdf':
text = read_pdf(uploaded_file)
elif file_type == 'docx':
text = read_word(uploaded_file)
else:
st.error("Unsupported file type. Please upload a PDF or Word file.")
text = None
return text
# Streamlit interface
def main():
st.title("πŸ“„ File Reader & Hugging Face Q&A Application")
st.write("Upload a PDF or Word file and ask questions based on its content.")
# File upload
uploaded_file = st.file_uploader("Choose a PDF or Word file", type=["pdf", "docx"])
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
# Extract and display text
file_text = extract_text(temp_file_path)
if file_text:
st.text_area("File Content", file_text[:1000] + "... (truncated for display)")
# Question-answering
question = st.text_input("Ask a question based on the file content:")
if st.button("Get Answer"):
if question.strip():
try:
result = qa_pipeline(question=question, context=file_text)
st.success(f"Answer: {result['answer']}")
except Exception as e:
st.error(f"Error generating answer: {str(e)}")
else:
st.warning("Please enter a question.")
if __name__ == "__main__":
main()