File size: 2,767 Bytes
b21d556 2dbdb3a b21d556 2dbdb3a b21d556 2dbdb3a b21d556 2dbdb3a b21d556 2dbdb3a b21d556 2dbdb3a b21d556 2dbdb3a af64ea8 2dbdb3a b21d556 2dbdb3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import streamlit as st
import os
from PyPDF2 import PdfReader
import openpyxl
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
os.environ['GOOGLE_API_KEY'] = 'AIzaSyD8uzXToT4I2ABs7qo_XiuKh8-L2nuWCEM'
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_excel_text(excel_docs):
text = ""
for excel_doc in excel_docs:
workbook = openpyxl.load_workbook(filename=excel_doc)
for sheet in workbook:
for row in sheet:
for cell in row:
text += str(cell.value) + " "
return text.strip()
def get_user_input(user_question, qa_pipeline):
with st.container():
response = qa_pipeline(question=user_question, context=st.session_state.raw_text)
st.write("Answer:", response["answer"])
def main():
st.set_page_config("DocChat")
st.header("DocChat - Chat with multiple documents")
st.write("---")
qa_pipeline = None
with st.container():
with st.sidebar:
st.title("Settings")
st.subheader("Upload Documents")
st.markdown("**PDF files:**")
pdf_docs = st.file_uploader("Upload PDF Files", accept_multiple_files=True)
if st.button("Process PDF file"):
with st.spinner("Processing PDFs..."):
raw_text = get_pdf_text(pdf_docs)
st.session_state.raw_text = raw_text
st.success("PDF processed successfully!")
st.markdown("**Excel files:**")
excel_docs = st.file_uploader("Upload Excel Files", accept_multiple_files=True)
if st.button("Process Excel file"):
with st.spinner("Processing Excel files..."):
raw_text = get_excel_text(excel_docs)
st.session_state.raw_text = raw_text
st.success("Excel file processed successfully!")
with st.container():
st.subheader("Document Q&A")
st.write('Ask a question : ')
user_question = st.text_input("Ask a Question from the document")
if user_question:
if not qa_pipeline and "raw_text" in st.session_state:
model_name = "HanNayeoniee/LHK_DPO_v1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
if qa_pipeline:
get_user_input(user_question, qa_pipeline)
if __name__ == "__main__":
main()
|