DebabrataHalder commited on
Commit
d49426a
·
verified ·
1 Parent(s): cb14e97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -1,14 +1,16 @@
 
1
  import os
2
  import logging
3
- import time
4
  from dotenv import load_dotenv
5
  import streamlit as st
6
  from PyPDF2 import PdfReader
7
  from langchain.text_splitter import CharacterTextSplitter
 
8
  from langchain_cohere import CohereEmbeddings
9
  from langchain.vectorstores import FAISS
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
 
12
  from langchain_groq import ChatGroq
13
 
14
  # Load environment variables
@@ -40,34 +42,22 @@ def get_text_chunks(text):
40
  chunks = text_splitter.split_text(text)
41
  return chunks
42
 
43
- # Function to create a FAISS vectorstore with batching
 
 
 
 
 
44
  def get_vectorstore(text_chunks):
45
  cohere_api_key = os.getenv("COHERE_API_KEY")
46
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
47
-
48
- # Batch processing to respect Cohere's rate limit
49
- batch_size = 40
50
- all_embeddings = []
51
-
52
- for i in range(0, len(text_chunks), batch_size):
53
- batch = text_chunks[i:i + batch_size]
54
- logging.info(f"Processing batch {i // batch_size + 1}: {len(batch)} texts")
55
- try:
56
- batch_embeddings = embeddings.embed_documents(batch)
57
- all_embeddings.extend(batch_embeddings)
58
- except Exception as e:
59
- logging.error(f"Error embedding batch {i // batch_size + 1}: {e}")
60
- st.error(f"An error occurred while embedding batch {i // batch_size + 1}.")
61
- if i + batch_size < len(text_chunks): # Enforce delay only if more batches remain
62
- logging.info("Waiting for 60 seconds to respect API rate limits...")
63
- time.sleep(60) # Wait for 60 seconds
64
-
65
- vectorstore = FAISS.from_texts_with_embeddings(texts=text_chunks, embeddings=all_embeddings)
66
  return vectorstore
67
 
68
  # Function to set up the conversational retrieval chain
69
  def get_conversation_chain(vectorstore):
70
  try:
 
71
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
72
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
73
 
 
1
+
2
  import os
3
  import logging
 
4
  from dotenv import load_dotenv
5
  import streamlit as st
6
  from PyPDF2 import PdfReader
7
  from langchain.text_splitter import CharacterTextSplitter
8
+ # from langchain.embeddings import HuggingFaceInstructEmbeddings
9
  from langchain_cohere import CohereEmbeddings
10
  from langchain.vectorstores import FAISS
11
  from langchain.memory import ConversationBufferMemory
12
  from langchain.chains import ConversationalRetrievalChain
13
+ # from langchain.llms import Ollama
14
  from langchain_groq import ChatGroq
15
 
16
  # Load environment variables
 
42
  chunks = text_splitter.split_text(text)
43
  return chunks
44
 
45
+ # Function to create a FAISS vectorstore
46
+ # def get_vectorstore(text_chunks):
47
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
48
+ # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
49
+ # return vectorstore
50
+
51
  def get_vectorstore(text_chunks):
52
  cohere_api_key = os.getenv("COHERE_API_KEY")
53
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
54
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  return vectorstore
56
 
57
  # Function to set up the conversational retrieval chain
58
  def get_conversation_chain(vectorstore):
59
  try:
60
+ # llm = Ollama(model="llama3.2:1b")
61
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
62
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
63