|
import os
|
|
import streamlit as st
|
|
from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from app import vectorstore
|
|
|
|
|
|
st.title("Document Management")
|
|
|
|
|
|
uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])
|
|
|
|
if uploaded_file is not None:
|
|
|
|
temp_dir = "temp_uploads"
|
|
os.makedirs(temp_dir, exist_ok=True)
|
|
file_path = os.path.join(temp_dir, uploaded_file.name)
|
|
|
|
|
|
with open(file_path, "wb") as f:
|
|
f.write(uploaded_file.getbuffer())
|
|
|
|
st.success(f"File {uploaded_file.name} successfully uploaded!")
|
|
|
|
|
|
if st.button("Process Document"):
|
|
with st.spinner("Processing document..."):
|
|
try:
|
|
|
|
if uploaded_file.type == "application/pdf":
|
|
loader = PyPDFLoader(file_path)
|
|
elif uploaded_file.type == "text/plain":
|
|
loader = TextLoader(file_path)
|
|
else:
|
|
st.error("Unsupported file type.")
|
|
st.stop()
|
|
|
|
documents = loader.load()
|
|
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
|
texts = text_splitter.split_documents(documents)
|
|
|
|
|
|
vectorstore.add_documents(texts)
|
|
|
|
st.success(f"Document processed and added to the knowledge base!")
|
|
except Exception as e:
|
|
st.error(f"An error occurred: {e}")
|
|
|
|
|
|
os.remove(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|