Spaces:

todap
/

Context-Aware-RAG-Based-ChatBot

Sleeping

File size: 2,735 Bytes

ada8cbb

import os
import streamlit as st
from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from app import vectorstore


st.title("Document Management")

# File uploader
uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])

if uploaded_file is not None:
    # Create a temporary directory to store the uploaded file
    temp_dir = "temp_uploads"
    os.makedirs(temp_dir, exist_ok=True)
    file_path = os.path.join(temp_dir, uploaded_file.name)
    
    # Save the uploaded file temporarily
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    st.success(f"File {uploaded_file.name} successfully uploaded!")

    # Process the uploaded file
    if st.button("Process Document"):
        with st.spinner("Processing document..."):
            try:
                # Load the document based on file type
                if uploaded_file.type == "application/pdf":
                    loader = PyPDFLoader(file_path)
                elif uploaded_file.type == "text/plain":
                    loader = TextLoader(file_path)
                else:
                    st.error("Unsupported file type.")
                    st.stop()
                
                documents = loader.load()
                
                # Split the document into chunks
                text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
                texts = text_splitter.split_documents(documents)
                
                # Add the chunks to the vectorstore
                vectorstore.add_documents(texts)
                
                st.success(f"Document processed and added to the knowledge base!")
            except Exception as e:
                st.error(f"An error occurred: {e}")
        
        # Clean up: remove the temporary file
        os.remove(file_path)

# Display current documents in the knowledge base
# st.subheader("Current Documents in Knowledge Base")
# # This is a placeholder. You'll need to implement a method to retrieve and display
# # the list of documents currently in your Chroma database.
# st.write("Placeholder for document list")

# # Option to clear the entire knowledge base
# if st.button("Clear Knowledge Base"):
#     if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
#         try:
#             # Clear the Chroma database
#             vectorstore.delete()
#             st.success("Knowledge base cleared!")
#         except Exception as e:
#             st.error(f"An error occurred: {e}")