DebabrataHalder commited on
Commit
b3f85f1
·
verified ·
1 Parent(s): 4b2c780

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -22
app.py CHANGED
@@ -4,15 +4,13 @@ from dotenv import load_dotenv
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
 
7
  from langchain_cohere import CohereEmbeddings
8
- from langchain_community.vectorstores import FAISS
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
 
11
  from langchain_groq import ChatGroq
12
- from langchain_core.docstore import InMemoryDocstore
13
- import faiss
14
- from uuid import uuid4
15
- from langchain_core.documents import Document
16
 
17
  # Load environment variables
18
  load_dotenv()
@@ -44,30 +42,21 @@ def get_text_chunks(text):
44
  return chunks
45
 
46
  # Function to create a FAISS vectorstore
 
 
 
 
 
47
  def get_vectorstore(text_chunks):
48
  cohere_api_key = os.getenv("COHERE_API_KEY")
49
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
50
-
51
- # Initialize FAISS index
52
- embedding_size = len(embeddings.embed_query("sample text"))
53
- index = faiss.IndexFlatL2(embedding_size)
54
- vectorstore = FAISS(
55
- embedding_function=embeddings,
56
- index=index,
57
- docstore=InMemoryDocstore(),
58
- index_to_docstore_id={}
59
- )
60
-
61
- # Add documents to the vectorstore
62
- documents = [Document(page_content=chunk) for chunk in text_chunks]
63
- ids = [str(uuid4()) for _ in documents]
64
- vectorstore.add_documents(documents=documents, ids=ids)
65
-
66
  return vectorstore
67
 
68
  # Function to set up the conversational retrieval chain
69
  def get_conversation_chain(vectorstore):
70
  try:
 
71
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
72
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
73
 
@@ -125,4 +114,4 @@ def main():
125
  st.session_state.conversation = get_conversation_chain(vectorstore)
126
 
127
  if __name__ == '__main__':
128
- main()
 
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
+ # from langchain.embeddings import HuggingFaceInstructEmbeddings
8
  from langchain_cohere import CohereEmbeddings
9
+ from langchain.vectorstores import FAISS
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
+ # from langchain.llms import Ollama
13
  from langchain_groq import ChatGroq
 
 
 
 
14
 
15
  # Load environment variables
16
  load_dotenv()
 
42
  return chunks
43
 
44
  # Function to create a FAISS vectorstore
45
+ # def get_vectorstore(text_chunks):
46
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
47
+ # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
48
+ # return vectorstore
49
+
50
  def get_vectorstore(text_chunks):
51
  cohere_api_key = os.getenv("COHERE_API_KEY")
52
  embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=cohere_api_key)
53
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  return vectorstore
55
 
56
  # Function to set up the conversational retrieval chain
57
  def get_conversation_chain(vectorstore):
58
  try:
59
+ # llm = Ollama(model="llama3.2:1b")
60
  llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.5)
61
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
62
 
 
114
  st.session_state.conversation = get_conversation_chain(vectorstore)
115
 
116
  if __name__ == '__main__':
117
+ main()