Spaces:

CognizantAI
/

QueryTranslationBuildToolWithVectors

Sleeping

App Files Files Community

davidfearne commited on Dec 12, 2024

Commit

37f93a9

verified ·

1 Parent(s): 48902ca

Create retriever.py

Browse files

Files changed (1) hide show

retriever.py +45 -0

retriever.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# Retriever function
+from pinecone import Pinecone
+from langchain_openai import AzureOpenAIEmbeddings
+import uuid
+import pandas as pd
+# Initialize Pinecone client
+pc = Pinecone(api_key="567aca04-6fb0-40a0-ba92-a5ed30be190b")
+index = pc.Index("openai-serverless")
+# Azure OpenAI configuration
+os.environ["AZURE_OPENAI_API_KEY"] =
+os.environ["AZURE_OPENAI_ENDPOINT"] = "https://davidfearn-gpt4.openai.azure.com/"
+os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] = "text-embedding-ada-002"
+os.environ["AZURE_OPENAI_API_VERSION"] = "2024-08-01-preview"
+# Model configuration
+embeddings_model = AzureOpenAIEmbeddings(
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+)
+def retriever(query, namespace="gskRegIntel", top_k=3):
+    """
+    Embeds a query string and searches the vector database for similar entries.
+    :param query: The string to embed and search for.
+    :param namespace: Pinecone namespace to search within.
+    :param top_k: Number of top results to retrieve.
+    :return: List of search results with metadata and scores.
+    """
+    try:
+        # Generate embedding for the query
+        query_embedding = embeddings_model.embed_query(query)
+        # Perform search in Pinecone
+        results = index.query(vector=query_embedding, top_k=top_k, namespace=namespace, include_metadata=True)
+        return results.matches
+    except Exception as e:
+        print(f"Error during search: {e}")
+        return []