annas4421 commited on
Commit
2651156
·
verified ·
1 Parent(s): 4c9d9d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -49
app.py CHANGED
@@ -55,78 +55,73 @@ def get_conversationchain(vectorstore):
55
  return conversation_chain
56
 
57
  # Extract text from various document types including PDFs, TXT, DOCX, and CSV.
 
 
58
  def get_document_text(uploaded_files):
59
  documents = []
60
-
61
  for uploaded_file in uploaded_files:
62
- file_extension = os.path.splitext(uploaded_file.name)[1].lower()
63
-
64
- if file_extension == ".pdf":
65
- loader = PyPDFLoader(uploaded_file)
 
 
 
 
66
  documents.extend(loader.load())
67
- elif file_extension in [".docx", ".doc"]:
68
- loader = Docx2txtLoader(uploaded_file)
69
  documents.extend(loader.load())
70
- elif file_extension == ".txt":
71
- loader = TextLoader(uploaded_file)
72
  documents.extend(loader.load())
73
- elif file_extension == ".csv":
74
- loader = CSVLoader(uploaded_file)
75
  documents.extend(loader.load())
76
-
 
77
  return documents
78
 
 
79
  # Function to process and handle a user's query
80
  def handle_question(conversation_chain, question):
81
  response = conversation_chain({'question': question})
82
  return response['answer']
83
 
84
- # Streamlit app
85
  def main():
86
- st.set_page_config(page_title="Chat with Documents", page_icon=":books:")
87
- st.title("Chat with Your Documents :books:")
88
-
89
- # Session state for conversation and chat history
90
- if "conversation_chain" not in st.session_state:
91
- st.session_state.conversation_chain = None
92
-
93
- st.sidebar.header("Upload Your Documents")
94
- uploaded_files = st.sidebar.file_uploader(
95
- "Upload your documents here (PDF, TXT, DOCX, CSV):",
96
- type=["pdf", "txt", "docx", "csv"],
97
- accept_multiple_files=True
98
- )
99
-
100
- if st.sidebar.button("Process"):
101
  if uploaded_files:
102
- with st.spinner("Processing your documents..."):
103
- # Extract text from uploaded documents
104
  raw_documents = get_document_text(uploaded_files)
105
-
106
- if not raw_documents:
107
- st.error("No text could be extracted from the documents. Please check the files.")
108
- return
109
-
110
- # Convert text to chunks
111
  text_chunks = get_chunks(raw_documents)
112
-
113
  # Create vectorstore
114
  vectorstore = get_vectorstore(text_chunks)
115
-
116
  # Create conversation chain
117
- st.session_state.conversation_chain = get_conversationchain(vectorstore)
118
-
119
- st.success("Documents processed successfully! You can now ask questions.")
120
  else:
121
- st.error("Please upload at least one document.")
 
 
 
 
 
 
122
 
123
- # Chat interface
124
- if st.session_state.conversation_chain:
125
- question = st.text_input("Ask a question about your documents:")
126
- if question:
127
- with st.spinner("Generating response..."):
128
- answer = handle_question(st.session_state.conversation_chain, question)
129
- st.markdown(f"**Answer:** {answer}")
130
 
131
  if __name__ == '__main__':
132
  main()
 
55
  return conversation_chain
56
 
57
  # Extract text from various document types including PDFs, TXT, DOCX, and CSV.
58
+ import tempfile
59
+
60
  def get_document_text(uploaded_files):
61
  documents = []
62
+
63
  for uploaded_file in uploaded_files:
64
+ # Create a temporary file to save the uploaded file
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_file:
66
+ temp_file.write(uploaded_file.read())
67
+ temp_file_path = temp_file.name
68
+
69
+ # Check the file extension and load accordingly
70
+ if uploaded_file.name.endswith(".pdf"):
71
+ loader = PyPDFLoader(temp_file_path)
72
  documents.extend(loader.load())
73
+ elif uploaded_file.name.endswith(".docx") or uploaded_file.name.endswith(".doc"):
74
+ loader = Docx2txtLoader(temp_file_path)
75
  documents.extend(loader.load())
76
+ elif uploaded_file.name.endswith(".txt"):
77
+ loader = TextLoader(temp_file_path)
78
  documents.extend(loader.load())
79
+ elif uploaded_file.name.endswith(".csv"):
80
+ loader = CSVLoader(temp_file_path)
81
  documents.extend(loader.load())
82
+
83
+ print("Number of documents:", len(documents))
84
  return documents
85
 
86
+
87
  # Function to process and handle a user's query
88
  def handle_question(conversation_chain, question):
89
  response = conversation_chain({'question': question})
90
  return response['answer']
91
 
 
92
  def main():
93
+ st.set_page_config(page_title="Chat with multiple documents", page_icon=":books:")
94
+ st.header("Chat with your documents :books:")
95
+
96
+ if "conversation" not in st.session_state:
97
+ st.session_state.conversation = None
98
+
99
+ uploaded_files = st.file_uploader("Upload your files (PDF, DOCX, TXT, CSV):", accept_multiple_files=True)
100
+
101
+ if st.button("Process"):
 
 
 
 
 
 
102
  if uploaded_files:
103
+ with st.spinner("Processing documents..."):
104
+ # Extract text from the uploaded documents
105
  raw_documents = get_document_text(uploaded_files)
106
+
107
+ # Convert text into chunks
 
 
 
 
108
  text_chunks = get_chunks(raw_documents)
109
+
110
  # Create vectorstore
111
  vectorstore = get_vectorstore(text_chunks)
112
+
113
  # Create conversation chain
114
+ st.session_state.conversation = get_conversationchain(vectorstore)
115
+ st.success("Documents processed successfully!")
 
116
  else:
117
+ st.warning("Please upload at least one document.")
118
+
119
+ question = st.text_input("Ask a question about the uploaded documents:")
120
+ if question and st.session_state.conversation:
121
+ handle_question(st.session_state.conversation, question)
122
+ elif question:
123
+ st.warning("Please process your documents first.")
124
 
 
 
 
 
 
 
 
125
 
126
  if __name__ == '__main__':
127
  main()