Upload README.md
Browse files
README.md
CHANGED
|
@@ -109,7 +109,76 @@ for i, query in enumerate(queries):
|
|
| 109 |
# Similarity: 0.6725 | Document 0: Machine learning is a subset of ...
|
| 110 |
# Similarity: 0.8287 | Document 1: Neural networks are trained ...
|
| 111 |
```
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
## Transformers Usage
|
| 114 |
|
| 115 |
See [here](https://huggingface.co/MongoDB/mdbr-leaf-mt/blob/main/transformers_example_mt.ipynb).
|
|
|
|
| 109 |
# Similarity: 0.6725 | Document 0: Machine learning is a subset of ...
|
| 110 |
# Similarity: 0.8287 | Document 1: Neural networks are trained ...
|
| 111 |
```
|
| 112 |
+
|
| 113 |
+
## Transformers.js
|
| 114 |
+
|
| 115 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
| 116 |
+
```bash
|
| 117 |
+
npm i @huggingface/transformers
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
You can then use the model to compute embeddings like this:
|
| 121 |
+
|
| 122 |
+
```js
|
| 123 |
+
import { AutoModel, AutoTokenizer, matmul } from "@huggingface/transformers";
|
| 124 |
+
|
| 125 |
+
// Download from the 🤗 Hub
|
| 126 |
+
const model_id = "onnx-community/mdbr-leaf-mt-ONNX";
|
| 127 |
+
const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
| 128 |
+
const model = await AutoModel.from_pretrained(model_id, {
|
| 129 |
+
dtype: "fp32", // Options: "fp32" | "q8" | "q4"
|
| 130 |
+
});
|
| 131 |
+
|
| 132 |
+
// Prepare queries and documents
|
| 133 |
+
const queries = [
|
| 134 |
+
"What is machine learning?",
|
| 135 |
+
"How does neural network training work?",
|
| 136 |
+
];
|
| 137 |
+
const documents = [
|
| 138 |
+
"Machine learning is a subset of artificial intelligence that focuses on algorithms that can learn from data.",
|
| 139 |
+
"Neural networks are trained through backpropagation, adjusting weights to minimize prediction errors.",
|
| 140 |
+
];
|
| 141 |
+
const inputs = await tokenizer([
|
| 142 |
+
...queries.map((x) => "Represent this sentence for searching relevant passages: " + x),
|
| 143 |
+
...documents,
|
| 144 |
+
], { padding: true });
|
| 145 |
+
|
| 146 |
+
// Generate embeddings
|
| 147 |
+
const { sentence_embedding } = await model(inputs);
|
| 148 |
+
const normalized_sentence_embedding = sentence_embedding.normalize();
|
| 149 |
+
|
| 150 |
+
// Compute similarities
|
| 151 |
+
const scores = await matmul(
|
| 152 |
+
normalized_sentence_embedding.slice([0, queries.length]),
|
| 153 |
+
normalized_sentence_embedding.slice([queries.length, null]).transpose(1, 0),
|
| 154 |
+
);
|
| 155 |
+
const scores_list = scores.tolist();
|
| 156 |
+
|
| 157 |
+
for (let i = 0; i < queries.length; ++i) {
|
| 158 |
+
console.log(`Query: ${queries[i]}`);
|
| 159 |
+
for (let j = 0; j < documents.length; ++j) {
|
| 160 |
+
console.log(` Similarity: ${scores_list[i][j].toFixed(4)} | Document ${j}: ${documents[j]}`);
|
| 161 |
+
}
|
| 162 |
+
console.log();
|
| 163 |
+
}
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
<details>
|
| 167 |
+
|
| 168 |
+
<summary>See example output</summary>
|
| 169 |
+
|
| 170 |
+
```
|
| 171 |
+
Query: What is machine learning?
|
| 172 |
+
Similarity: 0.9063 | Document 0: Machine learning is a subset of artificial intelligence that focuses on algorithms that can learn from data.
|
| 173 |
+
Similarity: 0.7287 | Document 1: Neural networks are trained through backpropagation, adjusting weights to minimize prediction errors.
|
| 174 |
+
|
| 175 |
+
Query: How does neural network training work?
|
| 176 |
+
Similarity: 0.6725 | Document 0: Machine learning is a subset of artificial intelligence that focuses on algorithms that can learn from data.
|
| 177 |
+
Similarity: 0.8287 | Document 1: Neural networks are trained through backpropagation, adjusting weights to minimize prediction errors.
|
| 178 |
+
```
|
| 179 |
+
</details>
|
| 180 |
+
|
| 181 |
+
|
| 182 |
## Transformers Usage
|
| 183 |
|
| 184 |
See [here](https://huggingface.co/MongoDB/mdbr-leaf-mt/blob/main/transformers_example_mt.ipynb).
|