todap's picture
Upload 6 files
ada8cbb verified
import os
import streamlit as st
from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from app import vectorstore
st.title("Document Management")
# File uploader
uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])
if uploaded_file is not None:
# Create a temporary directory to store the uploaded file
temp_dir = "temp_uploads"
os.makedirs(temp_dir, exist_ok=True)
file_path = os.path.join(temp_dir, uploaded_file.name)
# Save the uploaded file temporarily
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.success(f"File {uploaded_file.name} successfully uploaded!")
# Process the uploaded file
if st.button("Process Document"):
with st.spinner("Processing document..."):
try:
# Load the document based on file type
if uploaded_file.type == "application/pdf":
loader = PyPDFLoader(file_path)
elif uploaded_file.type == "text/plain":
loader = TextLoader(file_path)
else:
st.error("Unsupported file type.")
st.stop()
documents = loader.load()
# Split the document into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# Add the chunks to the vectorstore
vectorstore.add_documents(texts)
st.success(f"Document processed and added to the knowledge base!")
except Exception as e:
st.error(f"An error occurred: {e}")
# Clean up: remove the temporary file
os.remove(file_path)
# Display current documents in the knowledge base
# st.subheader("Current Documents in Knowledge Base")
# # This is a placeholder. You'll need to implement a method to retrieve and display
# # the list of documents currently in your Chroma database.
# st.write("Placeholder for document list")
# # Option to clear the entire knowledge base
# if st.button("Clear Knowledge Base"):
# if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
# try:
# # Clear the Chroma database
# vectorstore.delete()
# st.success("Knowledge base cleared!")
# except Exception as e:
# st.error(f"An error occurred: {e}")