metadata
language:
- zh
base_model: OpenSearch-AI/Ops-MoA-Conan-embedding-v1
model-index:
- name: Ops-MoA-Conan-embedding-v1
results:
- task:
type: Retrieval
dataset:
type: C-MTEB/CmedqaRetrieval
name: MTEB CmedqaRetrieval
config: default
split: dev
revision: cd540c506dae1cf9e9a59c3e06f42030d54e7301
metrics:
- type: ndcg_at_10
value: 48.218
- task:
type: Retrieval
dataset:
type: C-MTEB/CovidRetrieval
name: MTEB CovidRetrieval
config: default
split: dev
revision: 1271c7809071a13532e05f25fb53511ffce77117
metrics:
- type: ndcg_at_10
value: 92.664
- task:
type: Retrieval
dataset:
type: C-MTEB/DuRetrieval
name: MTEB DuRetrieval
config: default
split: dev
revision: a1a333e290fe30b10f3f56498e3a0d911a693ced
metrics:
- type: ndcg_at_10
value: 89.233
- task:
type: Retrieval
dataset:
type: C-MTEB/EcomRetrieval
name: MTEB EcomRetrieval
config: default
split: dev
revision: 687de13dc7294d6fd9be10c6945f9e8fec8166b9
metrics:
- type: ndcg_at_10
value: 70.93
- task:
type: Retrieval
dataset:
type: C-MTEB/MMarcoRetrieval
name: MTEB MMarcoRetrieval
config: default
split: dev
revision: 539bbde593d947e2a124ba72651aafc09eb33fc2
metrics:
- type: ndcg_at_10
value: 82.351
- task:
type: Retrieval
dataset:
type: C-MTEB/MedicalRetrieval
name: MTEB MedicalRetrieval
config: default
split: dev
revision: 2039188fb5800a9803ba5048df7b76e6fb151fc6
metrics:
- type: ndcg_at_10
value: 68.276
- task:
type: Retrieval
dataset:
type: C-MTEB/T2Retrieval
name: MTEB T2Retrieval
config: default
split: dev
revision: 8731a845f1bf500a4f111cf1070785c793d10e64
metrics:
- type: ndcg_at_10
value: 83.509
- task:
type: Retrieval
dataset:
type: C-MTEB/VideoRetrieval
name: MTEB VideoRetrieval
config: default
split: dev
revision: 58c2597a5943a2ba48f4668c3b90d796283c5639
metrics:
- type: ndcg_at_10
value: 80.643
pipeline_tag: feature-extraction
tags:
- mteb
import torch.nn as nn
from sentence_transformers import SentenceTransformer
from modeling_adaptor import MixtureOfAdaptors
class CustomSentenceTransformer(nn.Module):
def __init__(self, output_dim=1536):
super(CustomSentenceTransformer, self).__init__()
self.model = SentenceTransformer('TencentBAC/Conan-embedding-v1', trust_remote_code=True)
adaptor = MixtureOfAdaptors(5, 1792)
adaptor.load_state_dict(torch.load(f"conan-adaptors.pth"))
self.model.add_module('adaptor', adaptor)
self.output_dim = output_dim
def encode(self, sentences, **kwargs):
embeddings = self.model.encode(sentences, **kwargs)
return embeddings[:, :self.output_dim]
model = CustomSentenceTransformer(output_dim=1536)
model.encode(['text'])