|
--- |
|
language: |
|
- zh |
|
base_model: OpenSearch-AI/Ops-MoA-Conan-embedding-v1 |
|
model-index: |
|
- name: Ops-MoA-Conan-embedding-v1 |
|
results: |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/CmedqaRetrieval |
|
name: MTEB CmedqaRetrieval |
|
config: default |
|
split: dev |
|
revision: cd540c506dae1cf9e9a59c3e06f42030d54e7301 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 48.21 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/CovidRetrieval |
|
name: MTEB CovidRetrieval |
|
config: default |
|
split: dev |
|
revision: 1271c7809071a13532e05f25fb53511ffce77117 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 92.66 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/DuRetrieval |
|
name: MTEB DuRetrieval |
|
config: default |
|
split: dev |
|
revision: a1a333e290fe30b10f3f56498e3a0d911a693ced |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 89.23 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/EcomRetrieval |
|
name: MTEB EcomRetrieval |
|
config: default |
|
split: dev |
|
revision: 687de13dc7294d6fd9be10c6945f9e8fec8166b9 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 70.93 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/MMarcoRetrieval |
|
name: MTEB MMarcoRetrieval |
|
config: default |
|
split: dev |
|
revision: 539bbde593d947e2a124ba72651aafc09eb33fc2 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 82.35 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/MedicalRetrieval |
|
name: MTEB MedicalRetrieval |
|
config: default |
|
split: dev |
|
revision: 2039188fb5800a9803ba5048df7b76e6fb151fc6 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 68.27 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/T2Retrieval |
|
name: MTEB T2Retrieval |
|
config: default |
|
split: dev |
|
revision: 8731a845f1bf500a4f111cf1070785c793d10e64 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 83.51 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/VideoRetrieval |
|
name: MTEB VideoRetrieval |
|
config: default |
|
split: dev |
|
revision: 58c2597a5943a2ba48f4668c3b90d796283c5639 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 80.64 |
|
pipeline_tag: feature-extraction |
|
tags: |
|
- mteb |
|
- sentence-transformers |
|
library_name: transformers |
|
--- |
|
```python |
|
import torch.nn as nn |
|
from sentence_transformers import SentenceTransformer |
|
from modeling_adaptor import MixtureOfAdaptors |
|
class CustomSentenceTransformer(nn.Module): |
|
def __init__(self, output_dim=1536): |
|
super(CustomSentenceTransformer, self).__init__() |
|
self.model = SentenceTransformer('TencentBAC/Conan-embedding-v1', trust_remote_code=True) |
|
adaptor = MixtureOfAdaptors(5, 1792) |
|
adaptor.load_state_dict(torch.load(f"conan-adaptors.pth")) |
|
self.model.add_module('adaptor', adaptor) |
|
self.output_dim = output_dim |
|
|
|
def encode(self, sentences, **kwargs): |
|
embeddings = self.model.encode(sentences, **kwargs) |
|
return embeddings[:, :self.output_dim] |
|
|
|
model = CustomSentenceTransformer(output_dim=1536) |
|
model.encode(['text']) |