Spaces:

annas4421
/

Test-CHATBOT

Sleeping

App Files Files Community

annas4421 commited on Dec 19, 2024

Commit

2651156

verified ·

1 Parent(s): 4c9d9d1

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -49

app.py CHANGED Viewed

@@ -55,78 +55,73 @@ def get_conversationchain(vectorstore):
     return conversation_chain
 # Extract text from various document types including PDFs, TXT, DOCX, and CSV.
 def get_document_text(uploaded_files):
     documents = []
     for uploaded_file in uploaded_files:
-        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
-        if file_extension == ".pdf":
-            loader = PyPDFLoader(uploaded_file)
             documents.extend(loader.load())
-        elif file_extension in [".docx", ".doc"]:
-            loader = Docx2txtLoader(uploaded_file)
             documents.extend(loader.load())
-        elif file_extension == ".txt":
-            loader = TextLoader(uploaded_file)
             documents.extend(loader.load())
-        elif file_extension == ".csv":
-            loader = CSVLoader(uploaded_file)
             documents.extend(loader.load())
     return documents
 # Function to process and handle a user's query
 def handle_question(conversation_chain, question):
     response = conversation_chain({'question': question})
     return response['answer']
-# Streamlit app
 def main():
-    st.set_page_config(page_title="Chat with Documents", page_icon=":books:")
-    st.title("Chat with Your Documents :books:")
-    # Session state for conversation and chat history
-    if "conversation_chain" not in st.session_state:
-        st.session_state.conversation_chain = None
-    st.sidebar.header("Upload Your Documents")
-    uploaded_files = st.sidebar.file_uploader(
-        "Upload your documents here (PDF, TXT, DOCX, CSV):",
-        type=["pdf", "txt", "docx", "csv"],
-        accept_multiple_files=True
-    )
-    if st.sidebar.button("Process"):
         if uploaded_files:
-            with st.spinner("Processing your documents..."):
-                # Extract text from uploaded documents
                 raw_documents = get_document_text(uploaded_files)
-                if not raw_documents:
-                    st.error("No text could be extracted from the documents. Please check the files.")
-                    return
-                # Convert text to chunks
                 text_chunks = get_chunks(raw_documents)
                 # Create vectorstore
                 vectorstore = get_vectorstore(text_chunks)
                 # Create conversation chain
-                st.session_state.conversation_chain = get_conversationchain(vectorstore)
-                st.success("Documents processed successfully! You can now ask questions.")
         else:
-            st.error("Please upload at least one document.")
-    # Chat interface
-    if st.session_state.conversation_chain:
-        question = st.text_input("Ask a question about your documents:")
-        if question:
-            with st.spinner("Generating response..."):
-                answer = handle_question(st.session_state.conversation_chain, question)
-                st.markdown(f"**Answer:** {answer}")
 if __name__ == '__main__':
     main()

     return conversation_chain
 # Extract text from various document types including PDFs, TXT, DOCX, and CSV.
+import tempfile
 def get_document_text(uploaded_files):
     documents = []
     for uploaded_file in uploaded_files:
+        # Create a temporary file to save the uploaded file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_file:
+            temp_file.write(uploaded_file.read())
+            temp_file_path = temp_file.name
+        # Check the file extension and load accordingly
+        if uploaded_file.name.endswith(".pdf"):
+            loader = PyPDFLoader(temp_file_path)
             documents.extend(loader.load())
+        elif uploaded_file.name.endswith(".docx") or uploaded_file.name.endswith(".doc"):
+            loader = Docx2txtLoader(temp_file_path)
             documents.extend(loader.load())
+        elif uploaded_file.name.endswith(".txt"):
+            loader = TextLoader(temp_file_path)
             documents.extend(loader.load())
+        elif uploaded_file.name.endswith(".csv"):
+            loader = CSVLoader(temp_file_path)
             documents.extend(loader.load())
+    print("Number of documents:", len(documents))
     return documents
 # Function to process and handle a user's query
 def handle_question(conversation_chain, question):
     response = conversation_chain({'question': question})
     return response['answer']
 def main():
+    st.set_page_config(page_title="Chat with multiple documents", page_icon=":books:")
+    st.header("Chat with your documents :books:")
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = None
+    uploaded_files = st.file_uploader("Upload your files (PDF, DOCX, TXT, CSV):", accept_multiple_files=True)
+    if st.button("Process"):
         if uploaded_files:
+            with st.spinner("Processing documents..."):
+                # Extract text from the uploaded documents
                 raw_documents = get_document_text(uploaded_files)
+                # Convert text into chunks
                 text_chunks = get_chunks(raw_documents)
                 # Create vectorstore
                 vectorstore = get_vectorstore(text_chunks)
                 # Create conversation chain
+                st.session_state.conversation = get_conversationchain(vectorstore)
+                st.success("Documents processed successfully!")
         else:
+            st.warning("Please upload at least one document.")
+    question = st.text_input("Ask a question about the uploaded documents:")
+    if question and st.session_state.conversation:
+        handle_question(st.session_state.conversation, question)
+    elif question:
+        st.warning("Please process your documents first.")
 if __name__ == '__main__':
     main()