Spaces:

DebabrataHalder
/

chatWithMultiplePDF1

Sleeping

App Files Files Community

DebabrataHalder commited on Jan 22

Commit

b86dd1f

verified ·

1 Parent(s): 9b36123

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -22

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
-import logging
 import time
 from dotenv import load_dotenv
 import streamlit as st
 from PyPDF2 import PdfReader
@@ -40,23 +40,23 @@ def get_text_chunks(text):
     chunks = text_splitter.split_text(text)
     return chunks
-# Function to create a FAISS vectorstore with rate limiting
 def get_vectorstore(text_chunks):
     cohere_api_key = os.getenv("COHERE_API_KEY")
     embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
-    # Rate limiting: Ensure no more than 40 requests per minute
-    max_requests_per_minute = 40
-    wait_time = 60 / max_requests_per_minute
-    vectorstore = None
-    try:
-        vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
-        time.sleep(wait_time)  # Sleep to avoid hitting API rate limit
-    except Exception as e:
-        logging.error(f"Error creating vectorstore: {e}")
-        st.error("An error occurred while creating the vectorstore.")
     return vectorstore
 # Function to set up the conversational retrieval chain
@@ -64,13 +64,13 @@ def get_conversation_chain(vectorstore):
     try:
         llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vectorstore.as_retriever(),
             memory=memory
         )
         logging.info("Conversation chain created successfully.")
         return conversation_chain
     except Exception as e:
@@ -103,7 +103,6 @@ def main():
     st.header("Chat with multiple PDFs :books:")
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         handle_userinput(user_question)
@@ -112,16 +111,12 @@ def main():
         pdf_docs = st.file_uploader(
             "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
         )
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)
                 text_chunks = get_text_chunks(raw_text)
                 vectorstore = get_vectorstore(text_chunks)
-                if vectorstore is not None:  # Ensure vectorstore was created successfully
-                    st.session_state.conversation = get_conversation_chain(vectorstore)
 if __name__ == '__main__':
     main()

 import os
 import time
+import logging
 from dotenv import load_dotenv
 import streamlit as st
 from PyPDF2 import PdfReader
     chunks = text_splitter.split_text(text)
     return chunks
+# Function to create a FAISS vectorstore with throttling
 def get_vectorstore(text_chunks):
     cohere_api_key = os.getenv("COHERE_API_KEY")
     embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
+    vectorstore = FAISS()
+    batch_size = 10  # Number of chunks to process per batch
+    for i in range(0, len(text_chunks), batch_size):
+        batch = text_chunks[i:i + batch_size]
+        try:
+            vectors = embeddings.embed_documents(batch)
+            vectorstore.add_texts(texts=batch, embeddings=vectors)
+            logging.info(f"Processed batch {i // batch_size + 1}")
+        except Exception as e:
+            logging.error(f"Error processing batch {i // batch_size + 1}: {e}")
+        time.sleep(1.5)  # Sleep to avoid exceeding rate limit
     return vectorstore
 # Function to set up the conversational retrieval chain
     try:
         llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vectorstore.as_retriever(),
             memory=memory
         )
         logging.info("Conversation chain created successfully.")
         return conversation_chain
     except Exception as e:
     st.header("Chat with multiple PDFs :books:")
     user_question = st.text_input("Ask a question about your documents:")
     if user_question:
         handle_userinput(user_question)
         pdf_docs = st.file_uploader(
             "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
         )
         if st.button("Process"):
             with st.spinner("Processing..."):
                 raw_text = get_pdf_text(pdf_docs)
                 text_chunks = get_text_chunks(raw_text)
                 vectorstore = get_vectorstore(text_chunks)
+                st.session_state.conversation = get_conversation_chain(vectorstore)
 if __name__ == '__main__':
     main()