DebabrataHalder commited on
Commit
b86dd1f
·
verified ·
1 Parent(s): 9b36123

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -22
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
- import logging
3
  import time
 
4
  from dotenv import load_dotenv
5
  import streamlit as st
6
  from PyPDF2 import PdfReader
@@ -40,23 +40,23 @@ def get_text_chunks(text):
40
  chunks = text_splitter.split_text(text)
41
  return chunks
42
 
43
- # Function to create a FAISS vectorstore with rate limiting
44
  def get_vectorstore(text_chunks):
45
  cohere_api_key = os.getenv("COHERE_API_KEY")
46
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Rate limiting: Ensure no more than 40 requests per minute
49
- max_requests_per_minute = 40
50
- wait_time = 60 / max_requests_per_minute
51
-
52
- vectorstore = None
53
- try:
54
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
55
- time.sleep(wait_time) # Sleep to avoid hitting API rate limit
56
- except Exception as e:
57
- logging.error(f"Error creating vectorstore: {e}")
58
- st.error("An error occurred while creating the vectorstore.")
59
-
60
  return vectorstore
61
 
62
  # Function to set up the conversational retrieval chain
@@ -64,13 +64,13 @@ def get_conversation_chain(vectorstore):
64
  try:
65
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
66
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
67
-
68
  conversation_chain = ConversationalRetrievalChain.from_llm(
69
  llm=llm,
70
  retriever=vectorstore.as_retriever(),
71
  memory=memory
72
  )
73
-
74
  logging.info("Conversation chain created successfully.")
75
  return conversation_chain
76
  except Exception as e:
@@ -103,7 +103,6 @@ def main():
103
 
104
  st.header("Chat with multiple PDFs :books:")
105
  user_question = st.text_input("Ask a question about your documents:")
106
-
107
  if user_question:
108
  handle_userinput(user_question)
109
 
@@ -112,16 +111,12 @@ def main():
112
  pdf_docs = st.file_uploader(
113
  "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
114
  )
115
-
116
  if st.button("Process"):
117
  with st.spinner("Processing..."):
118
  raw_text = get_pdf_text(pdf_docs)
119
  text_chunks = get_text_chunks(raw_text)
120
  vectorstore = get_vectorstore(text_chunks)
121
- if vectorstore is not None: # Ensure vectorstore was created successfully
122
- st.session_state.conversation = get_conversation_chain(vectorstore)
123
 
124
  if __name__ == '__main__':
125
  main()
126
-
127
-
 
1
  import os
 
2
  import time
3
+ import logging
4
  from dotenv import load_dotenv
5
  import streamlit as st
6
  from PyPDF2 import PdfReader
 
40
  chunks = text_splitter.split_text(text)
41
  return chunks
42
 
43
+ # Function to create a FAISS vectorstore with throttling
44
  def get_vectorstore(text_chunks):
45
  cohere_api_key = os.getenv("COHERE_API_KEY")
46
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
47
+ vectorstore = FAISS()
48
+
49
+ batch_size = 10 # Number of chunks to process per batch
50
+ for i in range(0, len(text_chunks), batch_size):
51
+ batch = text_chunks[i:i + batch_size]
52
+ try:
53
+ vectors = embeddings.embed_documents(batch)
54
+ vectorstore.add_texts(texts=batch, embeddings=vectors)
55
+ logging.info(f"Processed batch {i // batch_size + 1}")
56
+ except Exception as e:
57
+ logging.error(f"Error processing batch {i // batch_size + 1}: {e}")
58
+ time.sleep(1.5) # Sleep to avoid exceeding rate limit
59
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  return vectorstore
61
 
62
  # Function to set up the conversational retrieval chain
 
64
  try:
65
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
66
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
67
+
68
  conversation_chain = ConversationalRetrievalChain.from_llm(
69
  llm=llm,
70
  retriever=vectorstore.as_retriever(),
71
  memory=memory
72
  )
73
+
74
  logging.info("Conversation chain created successfully.")
75
  return conversation_chain
76
  except Exception as e:
 
103
 
104
  st.header("Chat with multiple PDFs :books:")
105
  user_question = st.text_input("Ask a question about your documents:")
 
106
  if user_question:
107
  handle_userinput(user_question)
108
 
 
111
  pdf_docs = st.file_uploader(
112
  "Upload your PDFs here and click on 'Process'", accept_multiple_files=True
113
  )
 
114
  if st.button("Process"):
115
  with st.spinner("Processing..."):
116
  raw_text = get_pdf_text(pdf_docs)
117
  text_chunks = get_text_chunks(raw_text)
118
  vectorstore = get_vectorstore(text_chunks)
119
+ st.session_state.conversation = get_conversation_chain(vectorstore)
 
120
 
121
  if __name__ == '__main__':
122
  main()