Mbonea commited on
Commit
8d8ca50
·
1 Parent(s): e460efd
Files changed (1) hide show
  1. App/Embedding/utils/Initialize.py +32 -0
App/Embedding/utils/Initialize.py CHANGED
@@ -50,6 +50,24 @@ def generateChunks(chunks, task_id, n=100):
50
 
51
 
52
  def search(query: str, task_id: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  filtering_conditions = {
54
  "task_id": {"$eq": "task_id"},
55
  }
@@ -62,5 +80,19 @@ def search(query: str, task_id: str):
62
 
63
 
64
  def encode(temp: list[Document]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  docsearch.add_documents(temp)
66
  # return embeddings.embed_documents(texts = [d.page_content for d in temp])
 
50
 
51
 
52
  def search(query: str, task_id: str):
53
+ # get api key from app.pinecone.io
54
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
55
+ # find your environment next to the api key in pinecone console
56
+ PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
57
+
58
+
59
+ index_name = "transcript-bits"
60
+ model_name = "thenlper/gte-base"
61
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
62
+
63
+
64
+ pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
65
+ vector_index = pinecone.Index(index_name=index_name)
66
+ docsearch = Pinecone.from_existing_index(index_name, embeddings)
67
+
68
+
69
+
70
+
71
  filtering_conditions = {
72
  "task_id": {"$eq": "task_id"},
73
  }
 
80
 
81
 
82
  def encode(temp: list[Document]):
83
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
84
+ # find your environment next to the api key in pinecone console
85
+ PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
86
+
87
+
88
+ index_name = "transcript-bits"
89
+ model_name = "thenlper/gte-base"
90
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
91
+
92
+
93
+ pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
94
+ vector_index = pinecone.Index(index_name=index_name)
95
+ docsearch = Pinecone.from_existing_index(index_name, embeddings)
96
+
97
  docsearch.add_documents(temp)
98
  # return embeddings.embed_documents(texts = [d.page_content for d in temp])