Spaces:

samim2024
/

bsnl-chatboot

Sleeping

App Files Files Community

samim2024 commited on May 16

Commit

1676c9d

verified ·

1 Parent(s): 65ccc47

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -8

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 # app.py
 import streamlit as st
 import os
@@ -56,10 +57,9 @@ with st.sidebar:
         input_data = st.file_uploader("Upload a PDF, TXT, XLS/XLSX, or DOC/DOCX file", type=["pdf", "txt", "xls", "xlsx", "doc", "docx"])
         if st.button("Process File") and input_data is not None:
-            with st.spinner("Processing file..."):
-                vector_store = process_input(input_data)
-                st.session_state.vectorstore = vector_store
-                st.success("File processed successfully. You can now ask questions.")
     # Display chat history
     st.subheader("Chat History")
@@ -136,9 +136,17 @@ def process_input(input_data):
     # Create uploads directory
     os.makedirs("uploads", exist_ok=True)
     documents = ""
     file_name = input_data.name.lower()
     if file_name.endswith(".pdf"):
         pdf_reader = PdfReader(input_data)
         for page in pdf_reader.pages:
@@ -147,24 +155,32 @@ def process_input(input_data):
         documents = input_data.read().decode("utf-8")
     elif file_name.endswith((".xls", ".xlsx")):
         df = pd.read_excel(input_data)
-        # Convert all cells to strings and join
         documents = " ".join(df.astype(str).values.flatten())
     elif file_name.endswith((".doc", ".docx")):
         doc = Document(input_data)
         for para in doc.paragraphs:
             documents += para.text + "\n"
-    # Split text
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     texts = text_splitter.split_text(documents)
-    # Create embeddings
     hf_embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-mpnet-base-v2",
         model_kwargs={'device': 'cpu'}
     )
-    # Initialize FAISS
     dimension = len(hf_embeddings.embed_query("sample text"))
     index = faiss.IndexFlatL2(dimension)
     vector_store = FAISS(
@@ -181,6 +197,10 @@ def process_input(input_data):
     # Save vector store locally
     vector_store.save_local("vectorstore/faiss_index")
     return vector_store
 def answer_question(vectorstore, query):

 # app.py
 import streamlit as st
 import os
         input_data = st.file_uploader("Upload a PDF, TXT, XLS/XLSX, or DOC/DOCX file", type=["pdf", "txt", "xls", "xlsx", "doc", "docx"])
         if st.button("Process File") and input_data is not None:
+            vector_store = process_input(input_data)
+            st.session_state.vectorstore = vector_store
+            st.success("File processed successfully. You can now ask questions.")
     # Display chat history
     st.subheader("Chat History")
     # Create uploads directory
     os.makedirs("uploads", exist_ok=True)
+    # Initialize progress bar and status
+    progress_bar = st.progress(0)
+    status = st.status("Processing file...", expanded=True)
     documents = ""
     file_name = input_data.name.lower()
+    # Step 1: Read file
+    status.update(label="Reading file...")
+    progress_bar.progress(0.25)
     if file_name.endswith(".pdf"):
         pdf_reader = PdfReader(input_data)
         for page in pdf_reader.pages:
         documents = input_data.read().decode("utf-8")
     elif file_name.endswith((".xls", ".xlsx")):
         df = pd.read_excel(input_data)
         documents = " ".join(df.astype(str).values.flatten())
     elif file_name.endswith((".doc", ".docx")):
         doc = Document(input_data)
         for para in doc.paragraphs:
             documents += para.text + "\n"
+    # Step 2: Split text
+    status.update(label="Splitting text into chunks...")
+    progress_bar.progress(0.50)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     texts = text_splitter.split_text(documents)
+    # Step 3: Create embeddings
+    status.update(label="Creating embeddings...")
+    progress_bar.progress(0.75)
     hf_embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-mpnet-base-v2",
         model_kwargs={'device': 'cpu'}
     )
+    # Step 4: Initialize FAISS vector store
+    status.update(label="Building vector store...")
+    progress_bar.progress(0.90)
     dimension = len(hf_embeddings.embed_query("sample text"))
     index = faiss.IndexFlatL2(dimension)
     vector_store = FAISS(
     # Save vector store locally
     vector_store.save_local("vectorstore/faiss_index")
+    # Complete processing
+    status.update(label="Processing complete!", state="complete")
+    progress_bar.progress(1.0)
     return vector_store
 def answer_question(vectorstore, query):