Spaces:

annas4421
/

Test-CHATBOT

Sleeping

File size: 4,828 Bytes

import os
from dotenv import load_dotenv
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader, CSVLoader
import tempfile

# Load environment variables
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Custom Prompt Template
custom_template = """
<s>[INST] You are an Expert PDF and document assistant. Follow these instructions:
1. Greet the user and introduce yourself as a professional document assistant.
2. Answer user queries based on the document content. If a question is out of scope, politely end the conversation.
CHAT HISTORY: {chat_history}
QUESTION: {question}
ANSWER:
</s>[INST]
"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

# Function to extract text from documents
def get_document_text(uploaded_files):
    documents = []
    for uploaded_file in uploaded_files:
        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_file:
            temp_file.write(uploaded_file.read())
            temp_file_path = temp_file.name
        
        # Load document based on its type
        if uploaded_file.name.endswith(".pdf"):
            loader = PyPDFLoader(temp_file_path)
            documents.extend(loader.load())
        elif uploaded_file.name.endswith(".docx") or uploaded_file.name.endswith(".doc"):
            loader = Docx2txtLoader(temp_file_path)
            documents.extend(loader.load())
        elif uploaded_file.name.endswith(".txt"):
            loader = TextLoader(temp_file_path)
            documents.extend(loader.load())
        elif uploaded_file.name.endswith(".csv"):
            loader = CSVLoader(temp_file_path)
            documents.extend(loader.load())
    return documents

# Split text into chunks
def get_chunks(documents):
    text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
    return [chunk for doc in documents for chunk in text_splitter.split_text(doc.page_content)]

# Create vectorstore
def get_vectorstore(chunks):
    embeddings = OpenAIEmbeddings()
    return FAISS.from_texts(texts=chunks, embedding=embeddings)

# Create a conversational chain
def get_conversationchain(vectorstore):
    llm = ChatOpenAI(temperature=0.4, model_name='gpt-4o-mini')
    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        condense_question_prompt=CUSTOM_QUESTION_PROMPT,
        memory=memory
    )

# Handle user questions and update chat history
def handle_question(question):
    if not st.session_state.conversation:
        st.warning("Please process your documents first.")
        return
    
    response = st.session_state.conversation({'question': question})
    st.session_state.chat_history = response['chat_history']
    
    for i, msg in enumerate(st.session_state.chat_history):
        if i % 2 == 0:
            st.markdown(f"**You:** {msg.content}")
        else:
            st.markdown(f"**Bot:** {msg.content}")

# Main Streamlit app
def main():
    st.set_page_config(page_title="Chat with Documents", page_icon="📚")
    st.title("📚 Chat with Your Documents")
    st.sidebar.title("Upload Your Files")
    
    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = None
    
    # File uploader
    uploaded_files = st.sidebar.file_uploader("Upload your files (PDF, DOCX, TXT, CSV):", accept_multiple_files=True)
    
    # Process button
    if st.sidebar.button("Process Documents"):
        if uploaded_files:
            with st.spinner("Processing documents..."):
                # Extract text and create conversation chain
                raw_documents = get_document_text(uploaded_files)
                text_chunks = get_chunks(raw_documents)
                vectorstore = get_vectorstore(text_chunks)
                st.session_state.conversation = get_conversationchain(vectorstore)
                st.success("Documents processed successfully!")
        else:
            st.warning("Please upload at least one document.")
    
    # User input
    question = st.text_input("Ask a question about the uploaded documents:")
    if question:
        handle_question(question)

if __name__ == '__main__':
    main()