File size: 2,701 Bytes
f5776d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
from typing import Optional, Union, Any
from dsp.utils import dotdict
try:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents._paging import SearchItemPaged
except ImportError as e:
raise ImportError(
"You need to install azure-search-documents library"
"Please use the command: pip install azure-search-documents"
)
class AzureCognitiveSearch:
"""Wrapper for the Azure Cognitive Search Retrieval."""
def __init__(
self,
search_service_name: str,
search_api_key: str,
search_index_name: str,
field_text: str, # required field to map with "content" field in dsp framework
field_score: str, # required field to map with "score" field in dsp framework
):
self.search_service_name = search_service_name
self.search_api_key = search_api_key
self.search_index_name = search_index_name
self.endpoint=f"https://{self.search_service_name}.search.windows.net"
self.field_text = field_text # field name of the text content
self.field_score = field_score # field name of the search score
# Create a client
self.credential = AzureKeyCredential(self.search_api_key)
self.client = SearchClient(endpoint=self.endpoint,
index_name=self.search_index_name,
credential=self.credential)
def __call__(self, query: str, k: int = 10) -> Union[list[str], list[dotdict]]:
topk: list[dict[str, Any]] = azure_search_request(self.field_text, self.field_score, self.client, query, k)
topk = [{**d, "long_text": d["text"]} for d in topk]
return [dotdict(psg) for psg in topk]
def azure_search_request(key_content: str, key_score: str, client: SearchClient, query: str, top: int =1):
'''
Search in Azure Cognitive Search Index
'''
results = client.search(search_text=query,top=top)
results = process_azure_result(results, key_content, key_content)
return results
def process_azure_result(results:SearchItemPaged, content_key:str, content_score: str):
'''
process received result from Azure Cognitive Search as dictionary array and map content and score to correct format
'''
res = []
for result in results:
tmp = {}
for key, value in result.items():
if(key == content_key):
tmp["text"] = value # assign content
elif(key == content_score):
tmp["score"] = value
else:
tmp[key] = value
res.append(tmp)
return res
|