End of training
Browse files- 1_Pooling/config.json +3 -3
- README.md +39 -31
- added_tokens.json +4 -0
- config.json +10 -4
- eval/Information-Retrieval_evaluation_dim_128_results.csv +8 -0
- eval/Information-Retrieval_evaluation_dim_256_results.csv +8 -0
- eval/Information-Retrieval_evaluation_dim_384_results.csv +8 -0
- eval/Information-Retrieval_evaluation_dim_512_results.csv +8 -0
- eval/Information-Retrieval_evaluation_dim_64_results.csv +8 -0
- eval/Information-Retrieval_evaluation_dim_768_results.csv +8 -0
- model.safetensors +2 -2
- runs/Jun11_16-40-59_snark.fritz.box/events.out.tfevents.1749652861.snark.fritz.box.78994.0 +3 -0
- runs/Jun15_14-10-20_snark.fritz.box/events.out.tfevents.1749989436.snark.fritz.box.43343.0 +3 -0
- runs/Jun15_14-15-02_snark.fritz.box/events.out.tfevents.1749989715.snark.fritz.box.44429.0 +3 -0
- runs/Jun15_14-23-07_snark.fritz.box/events.out.tfevents.1749990244.snark.fritz.box.45770.0 +3 -0
- runs/Jun15_14-26-47_snark.fritz.box/events.out.tfevents.1749990420.snark.fritz.box.46430.0 +3 -0
- runs/Jun17_12-04-38_snark.fritz.box/events.out.tfevents.1750154680.snark.fritz.box.67555.0 +3 -0
- runs/Jun17_12-09-44_snark.fritz.box/events.out.tfevents.1750155010.snark.fritz.box.68692.0 +3 -0
- sentence_bert_config.json +2 -2
- tokenizer.json +19 -1
- tokenizer_config.json +18 -9
- training_args.bin +1 -1
1_Pooling/config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"word_embedding_dimension":
|
3 |
-
"pooling_mode_cls_token":
|
4 |
-
"pooling_mode_mean_tokens":
|
5 |
"pooling_mode_max_tokens": false,
|
6 |
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
"pooling_mode_weightedmean_tokens": false,
|
|
|
1 |
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
"pooling_mode_max_tokens": false,
|
6 |
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
"pooling_mode_weightedmean_tokens": false,
|
README.md
CHANGED
@@ -7,51 +7,51 @@ tags:
|
|
7 |
- dataset_size:100
|
8 |
- loss:MatryoshkaLoss
|
9 |
- loss:MultipleNegativesRankingLoss
|
10 |
-
base_model:
|
11 |
widget:
|
12 |
-
- source_sentence: <start>
|
13 |
sentences:
|
14 |
-
-
|
15 |
-
-
|
16 |
- The
|
17 |
-
- source_sentence: <start>
|
18 |
sentences:
|
19 |
-
- B.
|
20 |
-
- The
|
21 |
- Spencers
|
22 |
-
-
|
|
|
|
|
23 |
on Richard B.
|
24 |
sentences:
|
25 |
- The
|
26 |
- Spencers
|
27 |
- letter
|
28 |
-
- source_sentence: The letter <start>
|
29 |
on Richard B.
|
30 |
sentences:
|
31 |
- The
|
|
|
32 |
- The
|
33 |
-
|
34 |
-
- source_sentence: The letter was <start> PLJUGRFVAAQAWQSFRFYTTRREEDDEGR <end> published
|
35 |
on Richard B.
|
36 |
sentences:
|
37 |
-
- later
|
38 |
- letter
|
39 |
- The
|
|
|
40 |
pipeline_tag: sentence-similarity
|
41 |
library_name: sentence-transformers
|
42 |
---
|
43 |
|
44 |
-
# SentenceTransformer based on
|
45 |
|
46 |
-
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [
|
47 |
|
48 |
## Model Details
|
49 |
|
50 |
### Model Description
|
51 |
- **Model Type:** Sentence Transformer
|
52 |
-
- **Base model:** [
|
53 |
-
- **Maximum Sequence Length:**
|
54 |
-
- **Output Dimensionality:**
|
55 |
- **Similarity Function:** Cosine Similarity
|
56 |
- **Training Dataset:**
|
57 |
- generator
|
@@ -68,8 +68,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
|
|
68 |
|
69 |
```
|
70 |
SentenceTransformer(
|
71 |
-
(0): Transformer({'max_seq_length':
|
72 |
-
(1): Pooling({'word_embedding_dimension':
|
73 |
(2): Normalize()
|
74 |
)
|
75 |
```
|
@@ -92,13 +92,13 @@ from sentence_transformers import SentenceTransformer
|
|
92 |
model = SentenceTransformer("checkpoints")
|
93 |
# Run inference
|
94 |
sentences = [
|
95 |
-
'The letter was <start>
|
96 |
'later',
|
97 |
'The',
|
98 |
]
|
99 |
embeddings = model.encode(sentences)
|
100 |
print(embeddings.shape)
|
101 |
-
# [3,
|
102 |
|
103 |
# Get the similarity scores for the embeddings
|
104 |
similarities = model.similarity(embeddings, embeddings)
|
@@ -167,10 +167,18 @@ You can finetune this model on your own dataset.
|
|
167 |
{
|
168 |
"loss": "MultipleNegativesRankingLoss",
|
169 |
"matryoshka_dims": [
|
|
|
|
|
170 |
384,
|
|
|
|
|
171 |
64
|
172 |
],
|
173 |
"matryoshka_weights": [
|
|
|
|
|
|
|
|
|
174 |
1,
|
175 |
1
|
176 |
],
|
@@ -315,16 +323,16 @@ You can finetune this model on your own dataset.
|
|
315 |
### Training Logs
|
316 |
| Epoch | Step | Training Loss |
|
317 |
|:-----:|:----:|:-------------:|
|
318 |
-
| 0.4 | 10 |
|
319 |
-
| 0.8 | 20 |
|
320 |
-
| 1.2 | 30 |
|
321 |
-
| 1.6 | 40 |
|
322 |
-
| 2.0 | 50 |
|
323 |
-
| 2.4 | 60 |
|
324 |
-
| 2.8 | 70 |
|
325 |
-
| 3.2 | 80 |
|
326 |
-
| 3.6 | 90 |
|
327 |
-
| 4.0 | 100 |
|
328 |
|
329 |
|
330 |
### Framework Versions
|
|
|
7 |
- dataset_size:100
|
8 |
- loss:MatryoshkaLoss
|
9 |
- loss:MultipleNegativesRankingLoss
|
10 |
+
base_model: BAAI/bge-base-en-v1.5
|
11 |
widget:
|
12 |
+
- source_sentence: <start> FTYGHYHHYHGGTTGRREEEEEEEEDEEEE <end>
|
13 |
sentences:
|
14 |
+
- 'on'
|
15 |
+
- later
|
16 |
- The
|
17 |
+
- source_sentence: <start> YTTHHNNBHJHHGRFRRRFFEEEERRRRED <end>
|
18 |
sentences:
|
|
|
|
|
19 |
- Spencers
|
20 |
+
- Richard
|
21 |
+
- The
|
22 |
+
- source_sentence: The <start> MLIUYFRRSWEREFRGTTRFTTTFEDWEER <end> was later published
|
23 |
on Richard B.
|
24 |
sentences:
|
25 |
- The
|
26 |
- Spencers
|
27 |
- letter
|
28 |
+
- source_sentence: The letter was <start> PLJUGRFVAAQAWQSFRFYTTRREEDDEGR <end> published
|
29 |
on Richard B.
|
30 |
sentences:
|
31 |
- The
|
32 |
+
- later
|
33 |
- The
|
34 |
+
- source_sentence: The letter was <start> OLKMUYFDXSSAAZWRGTTTGRRDEDDRRR <end> published
|
|
|
35 |
on Richard B.
|
36 |
sentences:
|
|
|
37 |
- letter
|
38 |
- The
|
39 |
+
- later
|
40 |
pipeline_tag: sentence-similarity
|
41 |
library_name: sentence-transformers
|
42 |
---
|
43 |
|
44 |
+
# SentenceTransformer based on BAAI/bge-base-en-v1.5
|
45 |
|
46 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) on the generator dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
47 |
|
48 |
## Model Details
|
49 |
|
50 |
### Model Description
|
51 |
- **Model Type:** Sentence Transformer
|
52 |
+
- **Base model:** [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) <!-- at revision a5beb1e3e68b9ab74eb54cfd186867f64f240e1a -->
|
53 |
+
- **Maximum Sequence Length:** 512 tokens
|
54 |
+
- **Output Dimensionality:** 768 dimensions
|
55 |
- **Similarity Function:** Cosine Similarity
|
56 |
- **Training Dataset:**
|
57 |
- generator
|
|
|
68 |
|
69 |
```
|
70 |
SentenceTransformer(
|
71 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
72 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
73 |
(2): Normalize()
|
74 |
)
|
75 |
```
|
|
|
92 |
model = SentenceTransformer("checkpoints")
|
93 |
# Run inference
|
94 |
sentences = [
|
95 |
+
'The letter was <start> OLKMUYFDXSSAAZWRGTTTGRRDEDDRRR <end> published on Richard B.',
|
96 |
'later',
|
97 |
'The',
|
98 |
]
|
99 |
embeddings = model.encode(sentences)
|
100 |
print(embeddings.shape)
|
101 |
+
# [3, 768]
|
102 |
|
103 |
# Get the similarity scores for the embeddings
|
104 |
similarities = model.similarity(embeddings, embeddings)
|
|
|
167 |
{
|
168 |
"loss": "MultipleNegativesRankingLoss",
|
169 |
"matryoshka_dims": [
|
170 |
+
768,
|
171 |
+
512,
|
172 |
384,
|
173 |
+
256,
|
174 |
+
128,
|
175 |
64
|
176 |
],
|
177 |
"matryoshka_weights": [
|
178 |
+
1,
|
179 |
+
1,
|
180 |
+
1,
|
181 |
+
1,
|
182 |
1,
|
183 |
1
|
184 |
],
|
|
|
323 |
### Training Logs
|
324 |
| Epoch | Step | Training Loss |
|
325 |
|:-----:|:----:|:-------------:|
|
326 |
+
| 0.4 | 10 | 13.6421 |
|
327 |
+
| 0.8 | 20 | 11.8949 |
|
328 |
+
| 1.2 | 30 | 7.241 |
|
329 |
+
| 1.6 | 40 | 6.3184 |
|
330 |
+
| 2.0 | 50 | 4.4524 |
|
331 |
+
| 2.4 | 60 | 3.6606 |
|
332 |
+
| 2.8 | 70 | 3.4123 |
|
333 |
+
| 3.2 | 80 | 2.6028 |
|
334 |
+
| 3.6 | 90 | 2.1896 |
|
335 |
+
| 4.0 | 100 | 2.1076 |
|
336 |
|
337 |
|
338 |
### Framework Versions
|
added_tokens.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[END]": 30523,
|
3 |
+
"[START]": 30522
|
4 |
+
}
|
config.json
CHANGED
@@ -7,19 +7,25 @@
|
|
7 |
"gradient_checkpointing": false,
|
8 |
"hidden_act": "gelu",
|
9 |
"hidden_dropout_prob": 0.1,
|
10 |
-
"hidden_size":
|
|
|
|
|
|
|
11 |
"initializer_range": 0.02,
|
12 |
-
"intermediate_size":
|
|
|
|
|
|
|
13 |
"layer_norm_eps": 1e-12,
|
14 |
"max_position_embeddings": 512,
|
15 |
"model_type": "bert",
|
16 |
"num_attention_heads": 12,
|
17 |
-
"num_hidden_layers":
|
18 |
"pad_token_id": 0,
|
19 |
"position_embedding_type": "absolute",
|
20 |
"torch_dtype": "float32",
|
21 |
"transformers_version": "4.52.4",
|
22 |
"type_vocab_size": 2,
|
23 |
"use_cache": true,
|
24 |
-
"vocab_size":
|
25 |
}
|
|
|
7 |
"gradient_checkpointing": false,
|
8 |
"hidden_act": "gelu",
|
9 |
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
"layer_norm_eps": 1e-12,
|
20 |
"max_position_embeddings": 512,
|
21 |
"model_type": "bert",
|
22 |
"num_attention_heads": 12,
|
23 |
+
"num_hidden_layers": 12,
|
24 |
"pad_token_id": 0,
|
25 |
"position_embedding_type": "absolute",
|
26 |
"torch_dtype": "float32",
|
27 |
"transformers_version": "4.52.4",
|
28 |
"type_vocab_size": 2,
|
29 |
"use_cache": true,
|
30 |
+
"vocab_size": 30524
|
31 |
}
|
eval/Information-Retrieval_evaluation_dim_128_results.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
2 |
+
1.0,25,0.0,0.1,0.14,0.4,0.0,0.0,0.03333333333333333,0.1,0.027999999999999997,0.14,0.04,0.4,0.08903571428571422,0.15891748166085473,0.1280443398620643
|
3 |
+
2.0,50,0.1,0.11,0.13,0.5,0.1,0.1,0.03666666666666666,0.11,0.026000000000000002,0.13,0.05,0.5,0.15130952380952367,0.2269385614063873,0.18373924080735232
|
4 |
+
3.0,75,0.1,0.1,0.15,0.47,0.1,0.1,0.03333333333333333,0.1,0.03,0.15,0.04699999999999999,0.47,0.15097619047619035,0.22097689479015556,0.18936047152881524
|
5 |
+
None,0,0.0,0.0,0.0,0.44,0.0,0.0,0.0,0.0,0.0,0.0,0.044000000000000004,0.44,0.05490476190476188,0.13850552467454108,0.09346048396048395
|
6 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
7 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
8 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
eval/Information-Retrieval_evaluation_dim_256_results.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
2 |
+
1.0,25,0.06,0.07,0.28,0.37,0.06,0.06,0.02333333333333333,0.07,0.05600000000000001,0.28,0.037000000000000005,0.37,0.1209801587301586,0.17837172608153767,0.15862331467981622
|
3 |
+
2.0,50,0.1,0.12,0.19,0.41,0.1,0.1,0.039999999999999994,0.12,0.038000000000000006,0.19,0.040999999999999995,0.41,0.15186507936507923,0.20964629721675362,0.18852671077632382
|
4 |
+
3.0,75,0.1,0.11,0.27,0.48,0.1,0.1,0.03666666666666666,0.11,0.054000000000000006,0.27,0.048,0.48,0.16601587301587287,0.23721331849834656,0.19977757596720444
|
5 |
+
None,0,0.0,0.0,0.07,0.41,0.0,0.0,0.0,0.0,0.013999999999999999,0.07,0.040999999999999995,0.41,0.05853174603174605,0.13587224773156262,0.10210324805913042
|
6 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
7 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
8 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
eval/Information-Retrieval_evaluation_dim_384_results.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
2 |
+
1.0,25,0.02,0.05,0.22,0.31,0.02,0.02,0.016666666666666666,0.05,0.044000000000000004,0.22,0.031000000000000007,0.31,0.08586904761904764,0.13828730301060527,0.12756731095701682
|
3 |
+
2.0,50,0.1,0.12,0.16,0.31,0.1,0.1,0.039999999999999994,0.12,0.032,0.16,0.031,0.31,0.13404761904761897,0.17336238083258182,0.17839009988119894
|
4 |
+
3.0,75,0.1,0.12,0.2,0.41,0.1,0.1,0.039999999999999994,0.12,0.04000000000000001,0.2,0.040999999999999995,0.41,0.15067063492063482,0.20877015107584718,0.19038975871909308
|
5 |
+
None,0,0.0,0.0,0.07,0.34,0.0,0.0,0.0,0.0,0.014000000000000002,0.07,0.034,0.34,0.049218253968253974,0.11324168536312658,0.09819056841850958
|
6 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
7 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
8 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
eval/Information-Retrieval_evaluation_dim_512_results.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
2 |
+
1.0,25,0.0,0.03,0.2,0.34,0.0,0.0,0.01,0.03,0.04,0.2,0.034,0.34,0.06984920634920636,0.13224375948142458,0.10933917346843042
|
3 |
+
2.0,50,0.09,0.13,0.17,0.3,0.09,0.09,0.04333333333333333,0.13,0.034,0.17,0.03,0.3,0.1310238095238095,0.1690811231378314,0.17563488227458815
|
4 |
+
3.0,75,0.1,0.13,0.17,0.4,0.1,0.1,0.04333333333333333,0.13,0.034,0.17,0.04,0.4,0.14872619047619037,0.20487994694445105,0.18983214742773566
|
5 |
+
None,0,0.0,0.0,0.08,0.4,0.0,0.0,0.0,0.0,0.016,0.08,0.04,0.4,0.05428968253968255,0.129985584157983,0.10074305269893503
|
6 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
7 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
8 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
eval/Information-Retrieval_evaluation_dim_64_results.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
2 |
+
1.0,25,0.08,0.1,0.21,0.32,0.08,0.08,0.03333333333333333,0.1,0.042,0.21,0.032,0.32,0.131797619047619,0.17534726235750558,0.17307788345984942
|
3 |
+
2.0,50,0.1,0.18,0.21,0.36,0.1,0.1,0.06,0.18,0.042,0.21,0.036000000000000004,0.36,0.15984523809523804,0.2053888999109833,0.20180102624413773
|
4 |
+
3.0,75,0.11,0.19,0.22,0.34,0.11,0.11,0.06333333333333332,0.19,0.044000000000000004,0.22,0.034,0.34,0.16101190476190472,0.20199186992553161,0.20712119291956754
|
5 |
+
None,0,0.0,0.0,0.0,0.29,0.0,0.0,0.0,0.0,0.0,0.0,0.029000000000000005,0.29,0.03209523809523811,0.08710139890053481,0.08907092907092908
|
6 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
7 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
8 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
eval/Information-Retrieval_evaluation_dim_768_results.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
|
2 |
+
1.0,25,0.0,0.03,0.16,0.33,0.0,0.0,0.01,0.03,0.032,0.16,0.033,0.33,0.06391269841269842,0.12438013321411066,0.10620924478857914
|
3 |
+
2.0,50,0.08,0.12,0.14,0.26,0.08,0.08,0.039999999999999994,0.12,0.027999999999999997,0.14,0.026000000000000002,0.26,0.11834523809523803,0.15041653152432066,0.16498134545193366
|
4 |
+
3.0,75,0.1,0.1,0.14,0.35,0.1,0.1,0.03333333333333333,0.1,0.027999999999999997,0.14,0.035,0.35,0.13278968253968246,0.17998457242588123,0.17666418630389225
|
5 |
+
None,0,0.0,0.0,0.0,0.32,0.0,0.0,0.0,0.0,0.0,0.0,0.032,0.32,0.04357142857142856,0.10436767336990385,0.09584999559264264
|
6 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
7 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
8 |
+
None,0,1.0,1.0,1.0,1.0,1.0,1.0,0.3333333333333334,1.0,0.19999999999999996,1.0,0.09999999999999998,1.0,1.0,1.0,1.0
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ca6eeae92e6efc831bf6a82a7584f012f0a9004a86fd9fcaa7ec41dd8f5295e
|
3 |
+
size 437957472
|
runs/Jun11_16-40-59_snark.fritz.box/events.out.tfevents.1749652861.snark.fritz.box.78994.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d20cbdd31ecf28535f8e1fdba0dc5865b20964cee54914b3573f91dcbc6672e
|
3 |
+
size 24753
|
runs/Jun15_14-10-20_snark.fritz.box/events.out.tfevents.1749989436.snark.fritz.box.43343.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d31120162a4fa69f4372a0b6b16533cd21cf55b317dc3eb5e6f30bc84314c1d5
|
3 |
+
size 6276
|
runs/Jun15_14-15-02_snark.fritz.box/events.out.tfevents.1749989715.snark.fritz.box.44429.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e337f62989e0c5ad50317044f2fb27564fe352849cd5a9b94f2bdfa89dddf04
|
3 |
+
size 6276
|
runs/Jun15_14-23-07_snark.fritz.box/events.out.tfevents.1749990244.snark.fritz.box.45770.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d80b66cb641b86b526d9a3dba132db2f2da5d9752a215abc115caae86b0b0b10
|
3 |
+
size 6276
|
runs/Jun15_14-26-47_snark.fritz.box/events.out.tfevents.1749990420.snark.fritz.box.46430.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:812ace3a559ad2568a8992f9927b616887b70bf76c210c8763fd0795228d55a4
|
3 |
+
size 6276
|
runs/Jun17_12-04-38_snark.fritz.box/events.out.tfevents.1750154680.snark.fritz.box.67555.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a21cef39306279df407d5385a47792ca4384c3e0ba8868908c1e896a3527f10c
|
3 |
+
size 6772
|
runs/Jun17_12-09-44_snark.fritz.box/events.out.tfevents.1750155010.snark.fritz.box.68692.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d951afc89aca83d46a61ce5651768b4b66024e6bb5f019cef975671092af6924
|
3 |
+
size 6772
|
sentence_bert_config.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
-
"max_seq_length":
|
3 |
-
"do_lower_case":
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
}
|
tokenizer.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
-
"max_length":
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
@@ -59,6 +59,24 @@
|
|
59 |
"rstrip": false,
|
60 |
"normalized": false,
|
61 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
}
|
63 |
],
|
64 |
"normalizer": {
|
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
+
"max_length": 512,
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
|
|
59 |
"rstrip": false,
|
60 |
"normalized": false,
|
61 |
"special": true
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"id": 30522,
|
65 |
+
"content": "[START]",
|
66 |
+
"single_word": false,
|
67 |
+
"lstrip": false,
|
68 |
+
"rstrip": false,
|
69 |
+
"normalized": true,
|
70 |
+
"special": false
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"id": 30523,
|
74 |
+
"content": "[END]",
|
75 |
+
"single_word": false,
|
76 |
+
"lstrip": false,
|
77 |
+
"rstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"special": false
|
80 |
}
|
81 |
],
|
82 |
"normalizer": {
|
tokenizer_config.json
CHANGED
@@ -39,27 +39,36 @@
|
|
39 |
"rstrip": false,
|
40 |
"single_word": false,
|
41 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
},
|
44 |
-
"clean_up_tokenization_spaces":
|
45 |
"cls_token": "[CLS]",
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"extra_special_tokens": {},
|
49 |
"mask_token": "[MASK]",
|
50 |
-
"
|
51 |
-
"model_max_length": 256,
|
52 |
"never_split": null,
|
53 |
-
"pad_to_multiple_of": null,
|
54 |
"pad_token": "[PAD]",
|
55 |
-
"pad_token_type_id": 0,
|
56 |
-
"padding_side": "right",
|
57 |
"sep_token": "[SEP]",
|
58 |
-
"stride": 0,
|
59 |
"strip_accents": null,
|
60 |
"tokenize_chinese_chars": true,
|
61 |
"tokenizer_class": "BertTokenizer",
|
62 |
-
"truncation_side": "right",
|
63 |
-
"truncation_strategy": "longest_first",
|
64 |
"unk_token": "[UNK]"
|
65 |
}
|
|
|
39 |
"rstrip": false,
|
40 |
"single_word": false,
|
41 |
"special": true
|
42 |
+
},
|
43 |
+
"30522": {
|
44 |
+
"content": "[START]",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"30523": {
|
52 |
+
"content": "[END]",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
}
|
59 |
},
|
60 |
+
"clean_up_tokenization_spaces": true,
|
61 |
"cls_token": "[CLS]",
|
62 |
"do_basic_tokenize": true,
|
63 |
"do_lower_case": true,
|
64 |
"extra_special_tokens": {},
|
65 |
"mask_token": "[MASK]",
|
66 |
+
"model_max_length": 512,
|
|
|
67 |
"never_split": null,
|
|
|
68 |
"pad_token": "[PAD]",
|
|
|
|
|
69 |
"sep_token": "[SEP]",
|
|
|
70 |
"strip_accents": null,
|
71 |
"tokenize_chinese_chars": true,
|
72 |
"tokenizer_class": "BertTokenizer",
|
|
|
|
|
73 |
"unk_token": "[UNK]"
|
74 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5969
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1762c950240851dbfbe4aa34848965ecfc1203552f84ba62d02df00ba0c5f815
|
3 |
size 5969
|