ngcanh commited on
Commit
1b8c222
·
verified ·
1 Parent(s): af9cd0d

Update pages/management.py

Browse files
Files changed (1) hide show
  1. pages/management.py +54 -54
pages/management.py CHANGED
@@ -1,68 +1,68 @@
1
- # import os
2
- # import streamlit as st
3
- # from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
4
- # from langchain.text_splitter import CharacterTextSplitter
5
- # from app import vectorstore
6
 
7
 
8
- # st.title("Document Management")
9
 
10
- # # File uploader
11
- # uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])
12
 
13
- # if uploaded_file is not None:
14
- # # Create a temporary directory to store the uploaded file
15
- # temp_dir = "temp_uploads"
16
- # os.makedirs(temp_dir, exist_ok=True)
17
- # file_path = os.path.join(temp_dir, uploaded_file.name)
18
 
19
- # # Save the uploaded file temporarily
20
- # with open(file_path, "wb") as f:
21
- # f.write(uploaded_file.getbuffer())
22
 
23
- # st.success(f"File {uploaded_file.name} successfully uploaded!")
24
 
25
- # # Process the uploaded file
26
- # if st.button("Process Document"):
27
- # with st.spinner("Processing document..."):
28
- # try:
29
- # # Load the document based on file type
30
- # if uploaded_file.type == "application/pdf":
31
- # loader = PyPDFLoader(file_path)
32
- # elif uploaded_file.type == "text/plain":
33
- # loader = TextLoader(file_path)
34
- # else:
35
- # st.error("Unsupported file type.")
36
- # st.stop()
37
 
38
- # documents = loader.load()
39
 
40
- # # Split the document into chunks
41
- # text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=150)
42
- # texts = text_splitter.split_documents(documents)
43
 
44
- # # Add the chunks to the vectorstore
45
- # vectorstore.add_documents(texts)
46
 
47
- # st.success(f"Document processed and added to the knowledge base!")
48
- # except Exception as e:
49
- # st.error(f"An error occurred: {e}")
50
 
51
- # # Clean up: remove the temporary file
52
- # os.remove(file_path)
53
 
54
- # # Display current documents in the knowledge base
55
- # # st.subheader("Current Documents in Knowledge Base")
56
- # # # This is a placeholder. You'll need to implement a method to retrieve and display
57
- # # # the list of documents currently in your Chroma database.
58
- # # st.write("Placeholder for document list")
59
 
60
- # # # Option to clear the entire knowledge base
61
- # # if st.button("Clear Knowledge Base"):
62
- # # if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
63
- # # try:
64
- # # # Clear the Chroma database
65
- # # vectorstore.delete()
66
- # # st.success("Knowledge base cleared!")
67
- # # except Exception as e:
68
- # # st.error(f"An error occurred: {e}")
 
1
+ import os
2
+ import streamlit as st
3
+ from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from app import vectorstore
6
 
7
 
8
+ st.title("Document Management")
9
 
10
+ # File uploader
11
+ uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])
12
 
13
+ if uploaded_file is not None:
14
+ # Create a temporary directory to store the uploaded file
15
+ temp_dir = "temp_uploads"
16
+ os.makedirs(temp_dir, exist_ok=True)
17
+ file_path = os.path.join(temp_dir, uploaded_file.name)
18
 
19
+ # Save the uploaded file temporarily
20
+ with open(file_path, "wb") as f:
21
+ f.write(uploaded_file.getbuffer())
22
 
23
+ st.success(f"File {uploaded_file.name} successfully uploaded!")
24
 
25
+ # Process the uploaded file
26
+ if st.button("Process Document"):
27
+ with st.spinner("Processing document..."):
28
+ try:
29
+ # Load the document based on file type
30
+ if uploaded_file.type == "application/pdf":
31
+ loader = PyPDFLoader(file_path)
32
+ elif uploaded_file.type == "text/plain":
33
+ loader = TextLoader(file_path)
34
+ else:
35
+ st.error("Unsupported file type.")
36
+ st.stop()
37
 
38
+ documents = loader.load()
39
 
40
+ # Split the document into chunks
41
+ text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=150)
42
+ texts = text_splitter.split_documents(documents)
43
 
44
+ # Add the chunks to the vectorstore
45
+ vectorstore.add_documents(texts)
46
 
47
+ st.success(f"Document processed and added to the knowledge base!")
48
+ except Exception as e:
49
+ st.error(f"An error occurred: {e}")
50
 
51
+ # Clean up: remove the temporary file
52
+ os.remove(file_path)
53
 
54
+ # Display current documents in the knowledge base
55
+ # st.subheader("Current Documents in Knowledge Base")
56
+ # # This is a placeholder. You'll need to implement a method to retrieve and display
57
+ # # the list of documents currently in your Chroma database.
58
+ # st.write("Placeholder for document list")
59
 
60
+ # # Option to clear the entire knowledge base
61
+ # if st.button("Clear Knowledge Base"):
62
+ # if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
63
+ # try:
64
+ # # Clear the Chroma database
65
+ # vectorstore.delete()
66
+ # st.success("Knowledge base cleared!")
67
+ # except Exception as e:
68
+ # st.error(f"An error occurred: {e}")