Spaces:

todap
/

Context-Aware-RAG-Based-ChatBot

Sleeping

App Files Files Community

Context-Aware-RAG-Based-ChatBot / pages /management.py

todap

Upload 6 files

ada8cbb verified 12 months ago

raw

history blame contribute delete

2.74 kB

	import os
	import streamlit as st
	from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter
	from app import vectorstore


	st.title("Document Management")

	# File uploader
	uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])

	if uploaded_file is not None:
	# Create a temporary directory to store the uploaded file
	temp_dir = "temp_uploads"
	os.makedirs(temp_dir, exist_ok=True)
	file_path = os.path.join(temp_dir, uploaded_file.name)

	# Save the uploaded file temporarily
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	st.success(f"File {uploaded_file.name} successfully uploaded!")

	# Process the uploaded file
	if st.button("Process Document"):
	with st.spinner("Processing document..."):
	try:
	# Load the document based on file type
	if uploaded_file.type == "application/pdf":
	loader = PyPDFLoader(file_path)
	elif uploaded_file.type == "text/plain":
	loader = TextLoader(file_path)
	else:
	st.error("Unsupported file type.")
	st.stop()

	documents = loader.load()

	# Split the document into chunks
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	texts = text_splitter.split_documents(documents)

	# Add the chunks to the vectorstore
	vectorstore.add_documents(texts)

	st.success(f"Document processed and added to the knowledge base!")
	except Exception as e:
	st.error(f"An error occurred: {e}")

	# Clean up: remove the temporary file
	os.remove(file_path)

	# Display current documents in the knowledge base
	# st.subheader("Current Documents in Knowledge Base")
	# # This is a placeholder. You'll need to implement a method to retrieve and display
	# # the list of documents currently in your Chroma database.
	# st.write("Placeholder for document list")

	# # Option to clear the entire knowledge base
	# if st.button("Clear Knowledge Base"):
	# if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
	# try:
	# # Clear the Chroma database
	# vectorstore.delete()
	# st.success("Knowledge base cleared!")
	# except Exception as e:
	# st.error(f"An error occurred: {e}")