Upload 2 files
Browse files- README.md +2 -2
- evaluate_models.ipynb +182 -0
README.md
CHANGED
|
@@ -31,7 +31,7 @@ language:
|
|
| 31 |
|
| 32 |
`mdbr-leaf-mt` is a compact high-performance text embedding model designed for classification, clustering, semantic sentence similarity and summarization tasks.
|
| 33 |
|
| 34 |
-
To enable even greater efficiency, `mdbr-leaf-mt` supports [flexible asymmetric architectures](#asymmetric-retrieval-setup) and is robust to [vector quantization](#vector-quantization) and [MRL truncation](#mrl).
|
| 35 |
|
| 36 |
If you are looking to perform semantic search / information retrieval (e.g. for RAGs), please check out our [`mdbr-leaf-ir`](https://huggingface.co/MongoDB/mdbr-leaf-ir) model, which is specifically trained for these tasks.
|
| 37 |
|
|
@@ -172,7 +172,7 @@ print(f"* Similarities:\n{similarities}")
|
|
| 172 |
|
| 173 |
# Evaluation
|
| 174 |
|
| 175 |
-
|
| 176 |
|
| 177 |
# Citation
|
| 178 |
|
|
|
|
| 31 |
|
| 32 |
`mdbr-leaf-mt` is a compact high-performance text embedding model designed for classification, clustering, semantic sentence similarity and summarization tasks.
|
| 33 |
|
| 34 |
+
To enable even greater efficiency, `mdbr-leaf-mt` supports [flexible asymmetric architectures](#asymmetric-retrieval-setup) and is robust to [vector quantization](#vector-quantization) and [MRL truncation](#mrl-truncation).
|
| 35 |
|
| 36 |
If you are looking to perform semantic search / information retrieval (e.g. for RAGs), please check out our [`mdbr-leaf-ir`](https://huggingface.co/MongoDB/mdbr-leaf-ir) model, which is specifically trained for these tasks.
|
| 37 |
|
|
|
|
| 172 |
|
| 173 |
# Evaluation
|
| 174 |
|
| 175 |
+
Please [see here](https://huggingface.co/MongoDB/mdbr-leaf-mt/blob/main/evaluate_models.ipynb).
|
| 176 |
|
| 177 |
# Citation
|
| 178 |
|
evaluate_models.ipynb
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"id": "initial_id",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"collapsed": true
|
| 8 |
+
},
|
| 9 |
+
"source": [
|
| 10 |
+
"import os\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"IS_COLAB = True if 'GOOGLE_CLOUD_PROJECT' in os.environ else False\n",
|
| 13 |
+
"if IS_COLAB:\n",
|
| 14 |
+
" # this needs to run before all other imports\n",
|
| 15 |
+
" os.environ['HF_HOME'] = '/content/cache/' # to avoid running out of disk space\n",
|
| 16 |
+
"\n",
|
| 17 |
+
"import mteb\n",
|
| 18 |
+
"from sentence_transformers import SentenceTransformer"
|
| 19 |
+
],
|
| 20 |
+
"outputs": [],
|
| 21 |
+
"execution_count": null
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"source": [
|
| 27 |
+
"MODELS = {\n",
|
| 28 |
+
" 'ir-prod': {\n",
|
| 29 |
+
" 'name': 'MongoDB/mdbr-leaf-ir',\n",
|
| 30 |
+
" 'revision': '2e46f5aac796e621d51f678c306a66ede4712ecb'\n",
|
| 31 |
+
" },\n",
|
| 32 |
+
" 'ir-paper': {\n",
|
| 33 |
+
" 'name': 'MongoDB/mdbr-leaf-ir',\n",
|
| 34 |
+
" 'revision': 'ea98995e96beac21b820aa8ad9afaa6fd29b243d'\n",
|
| 35 |
+
" },\n",
|
| 36 |
+
" 'mt-prod': {\n",
|
| 37 |
+
" 'name': 'MongoDB/mdbr-leaf-mt',\n",
|
| 38 |
+
" 'revision': '66c47ba6d753efc208d54412b5af6c744a39a4df'\n",
|
| 39 |
+
" },\n",
|
| 40 |
+
" 'mt-paper': {\n",
|
| 41 |
+
" 'name': 'MongoDB/mdbr-leaf-mt',\n",
|
| 42 |
+
" 'revision': 'c342f945a6855346bd5f48d5ee8b7e39120b0ce9',\n",
|
| 43 |
+
" }\n",
|
| 44 |
+
"}"
|
| 45 |
+
],
|
| 46 |
+
"id": "f0189ff1e7814a5a",
|
| 47 |
+
"outputs": [],
|
| 48 |
+
"execution_count": null
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"cell_type": "markdown",
|
| 53 |
+
"source": [
|
| 54 |
+
"**Notebook configuration**:\n",
|
| 55 |
+
"* set the output folder and\n",
|
| 56 |
+
"* select one of the models defined above\n",
|
| 57 |
+
"* desired benchmark"
|
| 58 |
+
],
|
| 59 |
+
"id": "371c6122efdf476a"
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"metadata": {},
|
| 63 |
+
"cell_type": "code",
|
| 64 |
+
"source": [
|
| 65 |
+
"output_folder = f\"../../data/results/publish/\"\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"model_selection = MODELS['ir-prod']\n",
|
| 68 |
+
"benchmark_name = \"BEIR\"\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"# model_selection = MODELS['mt-prod']\n",
|
| 71 |
+
"# benchmark_name = \"MTEB(eng, v2)\""
|
| 72 |
+
],
|
| 73 |
+
"id": "58d52a330febb9ac",
|
| 74 |
+
"outputs": [],
|
| 75 |
+
"execution_count": null
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"metadata": {},
|
| 79 |
+
"cell_type": "markdown",
|
| 80 |
+
"source": "Load the model and run the evals",
|
| 81 |
+
"id": "1b4367afc1278e"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"cell_type": "code",
|
| 86 |
+
"source": [
|
| 87 |
+
"model = SentenceTransformer(\n",
|
| 88 |
+
" model_selection['name'],\n",
|
| 89 |
+
" revision=model_selection['revision']\n",
|
| 90 |
+
")\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"# alternative:\n",
|
| 93 |
+
"# meta = mteb.get_model_meta(\n",
|
| 94 |
+
"# model_name=model_selection['name'],\n",
|
| 95 |
+
"# revision=model_selection['revision']\n",
|
| 96 |
+
"# )\n",
|
| 97 |
+
"# model = meta.load_model()"
|
| 98 |
+
],
|
| 99 |
+
"id": "d6f13945a94f7a85",
|
| 100 |
+
"outputs": [],
|
| 101 |
+
"execution_count": null
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"metadata": {},
|
| 105 |
+
"cell_type": "code",
|
| 106 |
+
"source": [
|
| 107 |
+
"benchmark = mteb.get_benchmark(benchmark_name)\n",
|
| 108 |
+
"evaluation = mteb.MTEB(tasks=benchmark)"
|
| 109 |
+
],
|
| 110 |
+
"id": "c716c6344f9cd939",
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"execution_count": null
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"metadata": {},
|
| 116 |
+
"cell_type": "code",
|
| 117 |
+
"source": [
|
| 118 |
+
"%%time\n",
|
| 119 |
+
"results = evaluation.run(\n",
|
| 120 |
+
" model=model,\n",
|
| 121 |
+
" verbosity=1,\n",
|
| 122 |
+
" output_folder=output_folder,\n",
|
| 123 |
+
" overwrite_results=True,\n",
|
| 124 |
+
")"
|
| 125 |
+
],
|
| 126 |
+
"id": "9bd44e88fc360663",
|
| 127 |
+
"outputs": [],
|
| 128 |
+
"execution_count": null
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"cell_type": "markdown",
|
| 133 |
+
"source": "Evaluate Quora",
|
| 134 |
+
"id": "733e52ca41cf92a7"
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"metadata": {},
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"source": [
|
| 140 |
+
"if model_selection['name'].endswith('ir'):\n",
|
| 141 |
+
" # quora is closer to a sentence similarity task than a retrieval one, as queries aren't proper user queries\n",
|
| 142 |
+
" # we thus embed them without the typical query prompt\n",
|
| 143 |
+
" model.prompts = {}\n",
|
| 144 |
+
" tasks = mteb.get_tasks(tasks=[\n",
|
| 145 |
+
" \"QuoraRetrieval\",\n",
|
| 146 |
+
" ])\n",
|
| 147 |
+
"\n",
|
| 148 |
+
" evaluation = mteb.MTEB(tasks=tasks)\n",
|
| 149 |
+
" results = evaluation.run(\n",
|
| 150 |
+
" model=model,\n",
|
| 151 |
+
" verbosity=1,\n",
|
| 152 |
+
" output_folder=output_folder,\n",
|
| 153 |
+
" overwrite_results=True,\n",
|
| 154 |
+
" )"
|
| 155 |
+
],
|
| 156 |
+
"id": "61aea9a04468202f",
|
| 157 |
+
"outputs": [],
|
| 158 |
+
"execution_count": null
|
| 159 |
+
}
|
| 160 |
+
],
|
| 161 |
+
"metadata": {
|
| 162 |
+
"kernelspec": {
|
| 163 |
+
"display_name": "Python 3",
|
| 164 |
+
"language": "python",
|
| 165 |
+
"name": "python3"
|
| 166 |
+
},
|
| 167 |
+
"language_info": {
|
| 168 |
+
"codemirror_mode": {
|
| 169 |
+
"name": "ipython",
|
| 170 |
+
"version": 2
|
| 171 |
+
},
|
| 172 |
+
"file_extension": ".py",
|
| 173 |
+
"mimetype": "text/x-python",
|
| 174 |
+
"name": "python",
|
| 175 |
+
"nbconvert_exporter": "python",
|
| 176 |
+
"pygments_lexer": "ipython2",
|
| 177 |
+
"version": "2.7.6"
|
| 178 |
+
}
|
| 179 |
+
},
|
| 180 |
+
"nbformat": 4,
|
| 181 |
+
"nbformat_minor": 5
|
| 182 |
+
}
|