Test-CHATBOT / app.py
annas4421's picture
Update app.py
41d137b verified
raw
history blame
4.83 kB
import os
from dotenv import load_dotenv
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader, CSVLoader
import tempfile
# Load environment variables
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
# Custom Prompt Template
custom_template = """
<s>[INST] You are an Expert PDF and document assistant. Follow these instructions:
1. Greet the user and introduce yourself as a professional document assistant.
2. Answer user queries based on the document content. If a question is out of scope, politely end the conversation.
CHAT HISTORY: {chat_history}
QUESTION: {question}
ANSWER:
</s>[INST]
"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
# Function to extract text from documents
def get_document_text(uploaded_files):
documents = []
for uploaded_file in uploaded_files:
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
# Load document based on its type
if uploaded_file.name.endswith(".pdf"):
loader = PyPDFLoader(temp_file_path)
documents.extend(loader.load())
elif uploaded_file.name.endswith(".docx") or uploaded_file.name.endswith(".doc"):
loader = Docx2txtLoader(temp_file_path)
documents.extend(loader.load())
elif uploaded_file.name.endswith(".txt"):
loader = TextLoader(temp_file_path)
documents.extend(loader.load())
elif uploaded_file.name.endswith(".csv"):
loader = CSVLoader(temp_file_path)
documents.extend(loader.load())
return documents
# Split text into chunks
def get_chunks(documents):
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
return [chunk for doc in documents for chunk in text_splitter.split_text(doc.page_content)]
# Create vectorstore
def get_vectorstore(chunks):
embeddings = OpenAIEmbeddings()
return FAISS.from_texts(texts=chunks, embedding=embeddings)
# Create a conversational chain
def get_conversationchain(vectorstore):
llm = ChatOpenAI(temperature=0.4, model_name='gpt-4o-mini')
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
return ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
condense_question_prompt=CUSTOM_QUESTION_PROMPT,
memory=memory
)
# Handle user questions and update chat history
def handle_question(question):
if not st.session_state.conversation:
st.warning("Please process your documents first.")
return
response = st.session_state.conversation({'question': question})
st.session_state.chat_history = response['chat_history']
for i, msg in enumerate(st.session_state.chat_history):
if i % 2 == 0:
st.markdown(f"**You:** {msg.content}")
else:
st.markdown(f"**Bot:** {msg.content}")
# Main Streamlit app
def main():
st.set_page_config(page_title="Chat with Documents", page_icon="πŸ“š")
st.title("πŸ“š Chat with Your Documents")
st.sidebar.title("Upload Your Files")
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
# File uploader
uploaded_files = st.sidebar.file_uploader("Upload your files (PDF, DOCX, TXT, CSV):", accept_multiple_files=True)
# Process button
if st.sidebar.button("Process Documents"):
if uploaded_files:
with st.spinner("Processing documents..."):
# Extract text and create conversation chain
raw_documents = get_document_text(uploaded_files)
text_chunks = get_chunks(raw_documents)
vectorstore = get_vectorstore(text_chunks)
st.session_state.conversation = get_conversationchain(vectorstore)
st.success("Documents processed successfully!")
else:
st.warning("Please upload at least one document.")
# User input
question = st.text_input("Ask a question about the uploaded documents:")
if question:
handle_question(question)
if __name__ == '__main__':
main()