0000sir
Fix keys of Xinference deployed models, especially has the same model name with public hosted models. (#2832)
13b2570
| # | |
| # Copyright 2024 The InfiniFlow Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # | |
| import re | |
| import threading | |
| from urllib.parse import urljoin | |
| import requests | |
| from huggingface_hub import snapshot_download | |
| import os | |
| from abc import ABC | |
| import numpy as np | |
| from api.settings import LIGHTEN | |
| from api.utils.file_utils import get_home_cache_dir | |
| from rag.utils import num_tokens_from_string, truncate | |
| import json | |
| def sigmoid(x): | |
| return 1 / (1 + np.exp(-x)) | |
| class Base(ABC): | |
| def __init__(self, key, model_name): | |
| pass | |
| def similarity(self, query: str, texts: list): | |
| raise NotImplementedError("Please implement encode method!") | |
| class DefaultRerank(Base): | |
| _model = None | |
| _model_lock = threading.Lock() | |
| def __init__(self, key, model_name, **kwargs): | |
| """ | |
| If you have trouble downloading HuggingFace models, -_^ this might help!! | |
| For Linux: | |
| export HF_ENDPOINT=https://hf-mirror.com | |
| For Windows: | |
| Good luck | |
| ^_- | |
| """ | |
| if not LIGHTEN and not DefaultRerank._model: | |
| import torch | |
| from FlagEmbedding import FlagReranker | |
| with DefaultRerank._model_lock: | |
| if not DefaultRerank._model: | |
| try: | |
| DefaultRerank._model = FlagReranker( | |
| os.path.join(get_home_cache_dir(), re.sub(r"^[a-zA-Z]+/", "", model_name)), | |
| use_fp16=torch.cuda.is_available()) | |
| except Exception as e: | |
| model_dir = snapshot_download(repo_id=model_name, | |
| local_dir=os.path.join(get_home_cache_dir(), | |
| re.sub(r"^[a-zA-Z]+/", "", model_name)), | |
| local_dir_use_symlinks=False) | |
| DefaultRerank._model = FlagReranker(model_dir, use_fp16=torch.cuda.is_available()) | |
| self._model = DefaultRerank._model | |
| def similarity(self, query: str, texts: list): | |
| pairs = [(query, truncate(t, 2048)) for t in texts] | |
| token_count = 0 | |
| for _, t in pairs: | |
| token_count += num_tokens_from_string(t) | |
| batch_size = 4096 | |
| res = [] | |
| for i in range(0, len(pairs), batch_size): | |
| scores = self._model.compute_score(pairs[i:i + batch_size], max_length=2048) | |
| scores = sigmoid(np.array(scores)).tolist() | |
| if isinstance(scores, float): | |
| res.append(scores) | |
| else: | |
| res.extend(scores) | |
| return np.array(res), token_count | |
| class JinaRerank(Base): | |
| def __init__(self, key, model_name="jina-reranker-v1-base-en", | |
| base_url="https://api.jina.ai/v1/rerank"): | |
| self.base_url = "https://api.jina.ai/v1/rerank" | |
| self.headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {key}" | |
| } | |
| self.model_name = model_name | |
| def similarity(self, query: str, texts: list): | |
| texts = [truncate(t, 8196) for t in texts] | |
| data = { | |
| "model": self.model_name, | |
| "query": query, | |
| "documents": texts, | |
| "top_n": len(texts) | |
| } | |
| res = requests.post(self.base_url, headers=self.headers, json=data).json() | |
| rank = np.zeros(len(texts), dtype=float) | |
| for d in res["results"]: | |
| rank[d["index"]] = d["relevance_score"] | |
| return rank, res["usage"]["total_tokens"] | |
| class YoudaoRerank(DefaultRerank): | |
| _model = None | |
| _model_lock = threading.Lock() | |
| def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs): | |
| if not LIGHTEN and not YoudaoRerank._model: | |
| from BCEmbedding import RerankerModel | |
| with YoudaoRerank._model_lock: | |
| if not YoudaoRerank._model: | |
| try: | |
| print("LOADING BCE...") | |
| YoudaoRerank._model = RerankerModel(model_name_or_path=os.path.join( | |
| get_home_cache_dir(), | |
| re.sub(r"^[a-zA-Z]+/", "", model_name))) | |
| except Exception as e: | |
| YoudaoRerank._model = RerankerModel( | |
| model_name_or_path=model_name.replace( | |
| "maidalun1020", "InfiniFlow")) | |
| self._model = YoudaoRerank._model | |
| def similarity(self, query: str, texts: list): | |
| pairs = [(query, truncate(t, self._model.max_length)) for t in texts] | |
| token_count = 0 | |
| for _, t in pairs: | |
| token_count += num_tokens_from_string(t) | |
| batch_size = 8 | |
| res = [] | |
| for i in range(0, len(pairs), batch_size): | |
| scores = self._model.compute_score(pairs[i:i + batch_size], max_length=self._model.max_length) | |
| scores = sigmoid(np.array(scores)).tolist() | |
| if isinstance(scores, float): | |
| res.append(scores) | |
| else: | |
| res.extend(scores) | |
| return np.array(res), token_count | |
| class XInferenceRerank(Base): | |
| def __init__(self, key="xxxxxxx", model_name="", base_url=""): | |
| if base_url.find("/v1") == -1: | |
| base_url = urljoin(base_url, "/v1/rerank") | |
| self.model_name = model_name | |
| self.base_url = base_url | |
| self.headers = { | |
| "Content-Type": "application/json", | |
| "accept": "application/json", | |
| "Authorization": f"Bearer {key}" | |
| } | |
| def similarity(self, query: str, texts: list): | |
| if len(texts) == 0: | |
| return np.array([]), 0 | |
| data = { | |
| "model": self.model_name, | |
| "query": query, | |
| "return_documents": "true", | |
| "return_len": "true", | |
| "documents": texts | |
| } | |
| res = requests.post(self.base_url, headers=self.headers, json=data).json() | |
| rank = np.zeros(len(texts), dtype=float) | |
| for d in res["results"]: | |
| rank[d["index"]] = d["relevance_score"] | |
| return rank, res["meta"]["tokens"]["input_tokens"] + res["meta"]["tokens"]["output_tokens"] | |
| class LocalAIRerank(Base): | |
| def __init__(self, key, model_name, base_url): | |
| pass | |
| def similarity(self, query: str, texts: list): | |
| raise NotImplementedError("The LocalAIRerank has not been implement") | |
| class NvidiaRerank(Base): | |
| def __init__( | |
| self, key, model_name, base_url="https://ai.api.nvidia.com/v1/retrieval/nvidia/" | |
| ): | |
| if not base_url: | |
| base_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/" | |
| self.model_name = model_name | |
| if self.model_name == "nvidia/nv-rerankqa-mistral-4b-v3": | |
| self.base_url = os.path.join( | |
| base_url, "nv-rerankqa-mistral-4b-v3", "reranking" | |
| ) | |
| if self.model_name == "nvidia/rerank-qa-mistral-4b": | |
| self.base_url = os.path.join(base_url, "reranking") | |
| self.model_name = "nv-rerank-qa-mistral-4b:1" | |
| self.headers = { | |
| "accept": "application/json", | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {key}", | |
| } | |
| def similarity(self, query: str, texts: list): | |
| token_count = num_tokens_from_string(query) + sum( | |
| [num_tokens_from_string(t) for t in texts] | |
| ) | |
| data = { | |
| "model": self.model_name, | |
| "query": {"text": query}, | |
| "passages": [{"text": text} for text in texts], | |
| "truncate": "END", | |
| "top_n": len(texts), | |
| } | |
| res = requests.post(self.base_url, headers=self.headers, json=data).json() | |
| rank = np.zeros(len(texts), dtype=float) | |
| for d in res["rankings"]: | |
| rank[d["index"]] = d["logit"] | |
| return rank, token_count | |
| class LmStudioRerank(Base): | |
| def __init__(self, key, model_name, base_url): | |
| pass | |
| def similarity(self, query: str, texts: list): | |
| raise NotImplementedError("The LmStudioRerank has not been implement") | |
| class OpenAI_APIRerank(Base): | |
| def __init__(self, key, model_name, base_url): | |
| pass | |
| def similarity(self, query: str, texts: list): | |
| raise NotImplementedError("The api has not been implement") | |
| class CoHereRerank(Base): | |
| def __init__(self, key, model_name, base_url=None): | |
| from cohere import Client | |
| self.client = Client(api_key=key) | |
| self.model_name = model_name | |
| def similarity(self, query: str, texts: list): | |
| token_count = num_tokens_from_string(query) + sum( | |
| [num_tokens_from_string(t) for t in texts] | |
| ) | |
| res = self.client.rerank( | |
| model=self.model_name, | |
| query=query, | |
| documents=texts, | |
| top_n=len(texts), | |
| return_documents=False, | |
| ) | |
| rank = np.zeros(len(texts), dtype=float) | |
| for d in res.results: | |
| rank[d.index] = d.relevance_score | |
| return rank, token_count | |
| class TogetherAIRerank(Base): | |
| def __init__(self, key, model_name, base_url): | |
| pass | |
| def similarity(self, query: str, texts: list): | |
| raise NotImplementedError("The api has not been implement") | |
| class SILICONFLOWRerank(Base): | |
| def __init__( | |
| self, key, model_name, base_url="https://api.siliconflow.cn/v1/rerank" | |
| ): | |
| if not base_url: | |
| base_url = "https://api.siliconflow.cn/v1/rerank" | |
| self.model_name = model_name | |
| self.base_url = base_url | |
| self.headers = { | |
| "accept": "application/json", | |
| "content-type": "application/json", | |
| "authorization": f"Bearer {key}", | |
| } | |
| def similarity(self, query: str, texts: list): | |
| payload = { | |
| "model": self.model_name, | |
| "query": query, | |
| "documents": texts, | |
| "top_n": len(texts), | |
| "return_documents": False, | |
| "max_chunks_per_doc": 1024, | |
| "overlap_tokens": 80, | |
| } | |
| response = requests.post( | |
| self.base_url, json=payload, headers=self.headers | |
| ).json() | |
| rank = np.zeros(len(texts), dtype=float) | |
| for d in response["results"]: | |
| rank[d["index"]] = d["relevance_score"] | |
| return ( | |
| rank, | |
| response["meta"]["tokens"]["input_tokens"] + response["meta"]["tokens"]["output_tokens"], | |
| ) | |
| class BaiduYiyanRerank(Base): | |
| def __init__(self, key, model_name, base_url=None): | |
| from qianfan.resources import Reranker | |
| key = json.loads(key) | |
| ak = key.get("yiyan_ak", "") | |
| sk = key.get("yiyan_sk", "") | |
| self.client = Reranker(ak=ak, sk=sk) | |
| self.model_name = model_name | |
| def similarity(self, query: str, texts: list): | |
| res = self.client.do( | |
| model=self.model_name, | |
| query=query, | |
| documents=texts, | |
| top_n=len(texts), | |
| ).body | |
| rank = np.zeros(len(texts), dtype=float) | |
| for d in res["results"]: | |
| rank[d["index"]] = d["relevance_score"] | |
| return rank, res["usage"]["total_tokens"] | |
| class VoyageRerank(Base): | |
| def __init__(self, key, model_name, base_url=None): | |
| import voyageai | |
| self.client = voyageai.Client(api_key=key) | |
| self.model_name = model_name | |
| def similarity(self, query: str, texts: list): | |
| res = self.client.rerank( | |
| query=query, documents=texts, model=self.model_name, top_k=len(texts) | |
| ) | |
| rank = np.zeros(len(texts), dtype=float) | |
| for r in res.results: | |
| rank[r.index] = r.relevance_score | |
| return rank, res.total_tokens | |