DebabrataHalder commited on
Commit
ff5ca1d
·
verified ·
1 Parent(s): d49426a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -1,17 +1,15 @@
1
-
2
  import os
3
  import logging
 
4
  from dotenv import load_dotenv
5
  import streamlit as st
6
  from PyPDF2 import PdfReader
7
  from langchain.text_splitter import CharacterTextSplitter
8
- # from langchain.embeddings import HuggingFaceInstructEmbeddings
9
  from langchain_cohere import CohereEmbeddings
10
  from langchain.vectorstores import FAISS
11
  from langchain.memory import ConversationBufferMemory
12
  from langchain.chains import ConversationalRetrievalChain
13
- # from langchain.llms import Ollama
14
- from langchain_groq import ChatGroq
15
 
16
  # Load environment variables
17
  load_dotenv()
@@ -42,22 +40,30 @@ def get_text_chunks(text):
42
  chunks = text_splitter.split_text(text)
43
  return chunks
44
 
45
- # Function to create a FAISS vectorstore
46
- # def get_vectorstore(text_chunks):
47
- # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
48
- # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
49
- # return vectorstore
50
-
51
  def get_vectorstore(text_chunks):
52
  cohere_api_key = os.getenv("COHERE_API_KEY")
53
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
54
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
55
- return vectorstore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Function to set up the conversational retrieval chain
58
  def get_conversation_chain(vectorstore):
59
  try:
60
- # llm = Ollama(model="llama3.2:1b")
61
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
62
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
63
 
@@ -112,7 +118,9 @@ def main():
112
  raw_text = get_pdf_text(pdf_docs)
113
  text_chunks = get_text_chunks(raw_text)
114
  vectorstore = get_vectorstore(text_chunks)
115
- st.session_state.conversation = get_conversation_chain(vectorstore)
 
116
 
117
  if __name__ == '__main__':
118
  main()
 
 
 
1
  import os
2
  import logging
3
+ import time
4
  from dotenv import load_dotenv
5
  import streamlit as st
6
  from PyPDF2 import PdfReader
7
  from langchain.text_splitter import CharacterTextSplitter
 
8
  from langchain_cohere import CohereEmbeddings
9
  from langchain.vectorstores import FAISS
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
+ from cohere.errors import TooManyRequestsError
 
13
 
14
  # Load environment variables
15
  load_dotenv()
 
40
  chunks = text_splitter.split_text(text)
41
  return chunks
42
 
43
+ # Function to create a FAISS vectorstore with error handling for rate limits
 
 
 
 
 
44
  def get_vectorstore(text_chunks):
45
  cohere_api_key = os.getenv("COHERE_API_KEY")
46
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
47
+
48
+ retries = 5 # Number of retries before giving up
49
+ for attempt in range(retries):
50
+ try:
51
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
52
+ return vectorstore
53
+ except TooManyRequestsError as e:
54
+ logging.warning(f"Rate limit exceeded: {e}. Retrying in {attempt + 1} seconds...")
55
+ time.sleep(attempt + 1) # Exponential backoff
56
+ except Exception as e:
57
+ logging.error(f"Error creating vectorstore: {e}")
58
+ st.error("An error occurred while creating the vectorstore.")
59
+ break
60
+
61
+ st.error("Failed to create vectorstore after multiple attempts due to rate limits.")
62
+ return None
63
 
64
  # Function to set up the conversational retrieval chain
65
  def get_conversation_chain(vectorstore):
66
  try:
 
67
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
68
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
69
 
 
118
  raw_text = get_pdf_text(pdf_docs)
119
  text_chunks = get_text_chunks(raw_text)
120
  vectorstore = get_vectorstore(text_chunks)
121
+ if vectorstore is not None: # Only proceed if vectorstore creation was successful
122
+ st.session_state.conversation = get_conversation_chain(vectorstore)
123
 
124
  if __name__ == '__main__':
125
  main()
126
+