ok
Browse files
App/Embedding/utils/Initialize.py
CHANGED
@@ -50,6 +50,24 @@ def generateChunks(chunks, task_id, n=100):
|
|
50 |
|
51 |
|
52 |
def search(query: str, task_id: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
filtering_conditions = {
|
54 |
"task_id": {"$eq": "task_id"},
|
55 |
}
|
@@ -62,5 +80,19 @@ def search(query: str, task_id: str):
|
|
62 |
|
63 |
|
64 |
def encode(temp: list[Document]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
docsearch.add_documents(temp)
|
66 |
# return embeddings.embed_documents(texts = [d.page_content for d in temp])
|
|
|
50 |
|
51 |
|
52 |
def search(query: str, task_id: str):
|
53 |
+
# get api key from app.pinecone.io
|
54 |
+
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
55 |
+
# find your environment next to the api key in pinecone console
|
56 |
+
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
|
57 |
+
|
58 |
+
|
59 |
+
index_name = "transcript-bits"
|
60 |
+
model_name = "thenlper/gte-base"
|
61 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
62 |
+
|
63 |
+
|
64 |
+
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
|
65 |
+
vector_index = pinecone.Index(index_name=index_name)
|
66 |
+
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
|
71 |
filtering_conditions = {
|
72 |
"task_id": {"$eq": "task_id"},
|
73 |
}
|
|
|
80 |
|
81 |
|
82 |
def encode(temp: list[Document]):
|
83 |
+
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
84 |
+
# find your environment next to the api key in pinecone console
|
85 |
+
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
|
86 |
+
|
87 |
+
|
88 |
+
index_name = "transcript-bits"
|
89 |
+
model_name = "thenlper/gte-base"
|
90 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
91 |
+
|
92 |
+
|
93 |
+
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
|
94 |
+
vector_index = pinecone.Index(index_name=index_name)
|
95 |
+
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
96 |
+
|
97 |
docsearch.add_documents(temp)
|
98 |
# return embeddings.embed_documents(texts = [d.page_content for d in temp])
|