Spaces:

annas4421
/

Test-CHATBOT

Sleeping

App Files Files Community

Test-CHATBOT / app.py

annas4421

Update app.py

41d137b verified 8 months ago

raw

history blame

4.83 kB

	import os
	from dotenv import load_dotenv
	import streamlit as st
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.prompts import PromptTemplate
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain
	from langchain.chat_models import ChatOpenAI
	from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader, CSVLoader
	import tempfile

	# Load environment variables
	load_dotenv()
	api_key = os.getenv("OPENAI_API_KEY")

	# Custom Prompt Template
	custom_template = """
	<s>[INST] You are an Expert PDF and document assistant. Follow these instructions:
	1. Greet the user and introduce yourself as a professional document assistant.
	2. Answer user queries based on the document content. If a question is out of scope, politely end the conversation.
	CHAT HISTORY: {chat_history}
	QUESTION: {question}
	ANSWER:
	</s>[INST]
	"""
	CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

	# Function to extract text from documents
	def get_document_text(uploaded_files):
	documents = []
	for uploaded_file in uploaded_files:
	with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_file:
	temp_file.write(uploaded_file.read())
	temp_file_path = temp_file.name

	# Load document based on its type
	if uploaded_file.name.endswith(".pdf"):
	loader = PyPDFLoader(temp_file_path)
	documents.extend(loader.load())
	elif uploaded_file.name.endswith(".docx") or uploaded_file.name.endswith(".doc"):
	loader = Docx2txtLoader(temp_file_path)
	documents.extend(loader.load())
	elif uploaded_file.name.endswith(".txt"):
	loader = TextLoader(temp_file_path)
	documents.extend(loader.load())
	elif uploaded_file.name.endswith(".csv"):
	loader = CSVLoader(temp_file_path)
	documents.extend(loader.load())
	return documents

	# Split text into chunks
	def get_chunks(documents):
	text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
	return [chunk for doc in documents for chunk in text_splitter.split_text(doc.page_content)]

	# Create vectorstore
	def get_vectorstore(chunks):
	embeddings = OpenAIEmbeddings()
	return FAISS.from_texts(texts=chunks, embedding=embeddings)

	# Create a conversational chain
	def get_conversationchain(vectorstore):
	llm = ChatOpenAI(temperature=0.4, model_name='gpt-4o-mini')
	memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
	return ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=vectorstore.as_retriever(),
	condense_question_prompt=CUSTOM_QUESTION_PROMPT,
	memory=memory
	)

	# Handle user questions and update chat history
	def handle_question(question):
	if not st.session_state.conversation:
	st.warning("Please process your documents first.")
	return

	response = st.session_state.conversation({'question': question})
	st.session_state.chat_history = response['chat_history']

	for i, msg in enumerate(st.session_state.chat_history):
	if i % 2 == 0:
	st.markdown(f"You: {msg.content}")
	else:
	st.markdown(f"Bot: {msg.content}")

	# Main Streamlit app
	def main():
	st.set_page_config(page_title="Chat with Documents", page_icon="📚")
	st.title("📚 Chat with Your Documents")
	st.sidebar.title("Upload Your Files")

	if "conversation" not in st.session_state:
	st.session_state.conversation = None
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = None

	# File uploader
	uploaded_files = st.sidebar.file_uploader("Upload your files (PDF, DOCX, TXT, CSV):", accept_multiple_files=True)

	# Process button
	if st.sidebar.button("Process Documents"):
	if uploaded_files:
	with st.spinner("Processing documents..."):
	# Extract text and create conversation chain
	raw_documents = get_document_text(uploaded_files)
	text_chunks = get_chunks(raw_documents)
	vectorstore = get_vectorstore(text_chunks)
	st.session_state.conversation = get_conversationchain(vectorstore)
	st.success("Documents processed successfully!")
	else:
	st.warning("Please upload at least one document.")

	# User input
	question = st.text_input("Ask a question about the uploaded documents:")
	if question:
	handle_question(question)

	if __name__ == '__main__':
	main()