Spaces:
Runtime error
Runtime error
File size: 2,650 Bytes
4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 6514ba2 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf 822040d 4cfa7cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import os
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template
def extract_text_from_pdfs(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def split_text_into_chunks(text):
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
return text_splitter.split_text(text)
def create_vector_store_from_text_chunks(text_chunks):
key = os.getenv('OPENAI_KEY')
embeddings = OpenAIEmbeddings(openai_api_key=key)
return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
def create_conversation_chain(vectorstore):
llm = ChatOpenAI()
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
def process_user_input(user_question):
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history = response['chat_history']
for i, message in enumerate(st.session_state.chat_history):
template = user_template if i % 2 == 0 else bot_template
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
def main():
load_dotenv()
st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
st.write(css, unsafe_allow_html=True)
st.header("Chat with multiple PDFs :books:")
user_question = st.text_input("Ask a question about your documents:")
if user_question:
process_user_input(user_question)
with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Process"):
with st.spinner("Processing"):
raw_text = extract_text_from_pdfs(pdf_docs)
text_chunks = split_text_into_chunks(raw_text)
vectorstore = create_vector_store_from_text_chunks(text_chunks)
st.session_state.conversation = create_conversation_chain(vectorstore)
if __name__ == '__main__':
main()
|