Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -55,78 +55,73 @@ def get_conversationchain(vectorstore):
|
|
55 |
return conversation_chain
|
56 |
|
57 |
# Extract text from various document types including PDFs, TXT, DOCX, and CSV.
|
|
|
|
|
58 |
def get_document_text(uploaded_files):
|
59 |
documents = []
|
60 |
-
|
61 |
for uploaded_file in uploaded_files:
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
documents.extend(loader.load())
|
67 |
-
elif
|
68 |
-
loader = Docx2txtLoader(
|
69 |
documents.extend(loader.load())
|
70 |
-
elif
|
71 |
-
loader = TextLoader(
|
72 |
documents.extend(loader.load())
|
73 |
-
elif
|
74 |
-
loader = CSVLoader(
|
75 |
documents.extend(loader.load())
|
76 |
-
|
|
|
77 |
return documents
|
78 |
|
|
|
79 |
# Function to process and handle a user's query
|
80 |
def handle_question(conversation_chain, question):
|
81 |
response = conversation_chain({'question': question})
|
82 |
return response['answer']
|
83 |
|
84 |
-
# Streamlit app
|
85 |
def main():
|
86 |
-
st.set_page_config(page_title="Chat with
|
87 |
-
st.
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
"Upload your documents here (PDF, TXT, DOCX, CSV):",
|
96 |
-
type=["pdf", "txt", "docx", "csv"],
|
97 |
-
accept_multiple_files=True
|
98 |
-
)
|
99 |
-
|
100 |
-
if st.sidebar.button("Process"):
|
101 |
if uploaded_files:
|
102 |
-
with st.spinner("Processing
|
103 |
-
# Extract text from uploaded documents
|
104 |
raw_documents = get_document_text(uploaded_files)
|
105 |
-
|
106 |
-
|
107 |
-
st.error("No text could be extracted from the documents. Please check the files.")
|
108 |
-
return
|
109 |
-
|
110 |
-
# Convert text to chunks
|
111 |
text_chunks = get_chunks(raw_documents)
|
112 |
-
|
113 |
# Create vectorstore
|
114 |
vectorstore = get_vectorstore(text_chunks)
|
115 |
-
|
116 |
# Create conversation chain
|
117 |
-
st.session_state.
|
118 |
-
|
119 |
-
st.success("Documents processed successfully! You can now ask questions.")
|
120 |
else:
|
121 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
-
# Chat interface
|
124 |
-
if st.session_state.conversation_chain:
|
125 |
-
question = st.text_input("Ask a question about your documents:")
|
126 |
-
if question:
|
127 |
-
with st.spinner("Generating response..."):
|
128 |
-
answer = handle_question(st.session_state.conversation_chain, question)
|
129 |
-
st.markdown(f"**Answer:** {answer}")
|
130 |
|
131 |
if __name__ == '__main__':
|
132 |
main()
|
|
|
55 |
return conversation_chain
|
56 |
|
57 |
# Extract text from various document types including PDFs, TXT, DOCX, and CSV.
|
58 |
+
import tempfile
|
59 |
+
|
60 |
def get_document_text(uploaded_files):
|
61 |
documents = []
|
62 |
+
|
63 |
for uploaded_file in uploaded_files:
|
64 |
+
# Create a temporary file to save the uploaded file
|
65 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[-1]) as temp_file:
|
66 |
+
temp_file.write(uploaded_file.read())
|
67 |
+
temp_file_path = temp_file.name
|
68 |
+
|
69 |
+
# Check the file extension and load accordingly
|
70 |
+
if uploaded_file.name.endswith(".pdf"):
|
71 |
+
loader = PyPDFLoader(temp_file_path)
|
72 |
documents.extend(loader.load())
|
73 |
+
elif uploaded_file.name.endswith(".docx") or uploaded_file.name.endswith(".doc"):
|
74 |
+
loader = Docx2txtLoader(temp_file_path)
|
75 |
documents.extend(loader.load())
|
76 |
+
elif uploaded_file.name.endswith(".txt"):
|
77 |
+
loader = TextLoader(temp_file_path)
|
78 |
documents.extend(loader.load())
|
79 |
+
elif uploaded_file.name.endswith(".csv"):
|
80 |
+
loader = CSVLoader(temp_file_path)
|
81 |
documents.extend(loader.load())
|
82 |
+
|
83 |
+
print("Number of documents:", len(documents))
|
84 |
return documents
|
85 |
|
86 |
+
|
87 |
# Function to process and handle a user's query
|
88 |
def handle_question(conversation_chain, question):
|
89 |
response = conversation_chain({'question': question})
|
90 |
return response['answer']
|
91 |
|
|
|
92 |
def main():
|
93 |
+
st.set_page_config(page_title="Chat with multiple documents", page_icon=":books:")
|
94 |
+
st.header("Chat with your documents :books:")
|
95 |
+
|
96 |
+
if "conversation" not in st.session_state:
|
97 |
+
st.session_state.conversation = None
|
98 |
+
|
99 |
+
uploaded_files = st.file_uploader("Upload your files (PDF, DOCX, TXT, CSV):", accept_multiple_files=True)
|
100 |
+
|
101 |
+
if st.button("Process"):
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
if uploaded_files:
|
103 |
+
with st.spinner("Processing documents..."):
|
104 |
+
# Extract text from the uploaded documents
|
105 |
raw_documents = get_document_text(uploaded_files)
|
106 |
+
|
107 |
+
# Convert text into chunks
|
|
|
|
|
|
|
|
|
108 |
text_chunks = get_chunks(raw_documents)
|
109 |
+
|
110 |
# Create vectorstore
|
111 |
vectorstore = get_vectorstore(text_chunks)
|
112 |
+
|
113 |
# Create conversation chain
|
114 |
+
st.session_state.conversation = get_conversationchain(vectorstore)
|
115 |
+
st.success("Documents processed successfully!")
|
|
|
116 |
else:
|
117 |
+
st.warning("Please upload at least one document.")
|
118 |
+
|
119 |
+
question = st.text_input("Ask a question about the uploaded documents:")
|
120 |
+
if question and st.session_state.conversation:
|
121 |
+
handle_question(st.session_state.conversation, question)
|
122 |
+
elif question:
|
123 |
+
st.warning("Please process your documents first.")
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
if __name__ == '__main__':
|
127 |
main()
|