Mardiyyah commited on
Commit
a40ea21
·
verified ·
1 Parent(s): 7ec0f8d

Mlr-shared-task-ewc_stabilised-no-date-lambda0.4

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: afl-3.0
3
+ base_model: masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - f1
8
+ - precision
9
+ - recall
10
+ - accuracy
11
+ model-index:
12
+ - name: ewc_stabilised_no_date_lambda0.4
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # ewc_stabilised_no_date_lambda0.4
20
+
21
+ This model is a fine-tuned version of [masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0](https://huggingface.co/masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0) on the None dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 0.1841
24
+ - F1: 0.8384
25
+ - Precision: 0.8348
26
+ - Recall: 0.8421
27
+ - Accuracy: 0.9649
28
+
29
+ ## Model description
30
+
31
+ More information needed
32
+
33
+ ## Intended uses & limitations
34
+
35
+ More information needed
36
+
37
+ ## Training and evaluation data
38
+
39
+ More information needed
40
+
41
+ ## Training procedure
42
+
43
+ ### Training hyperparameters
44
+
45
+ The following hyperparameters were used during training:
46
+ - learning_rate: 2e-05
47
+ - train_batch_size: 16
48
+ - eval_batch_size: 8
49
+ - seed: 3407
50
+ - gradient_accumulation_steps: 4
51
+ - total_train_batch_size: 64
52
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
+ - lr_scheduler_type: linear
54
+ - lr_scheduler_warmup_steps: 1000
55
+ - num_epochs: 10
56
+ - mixed_precision_training: Native AMP
57
+
58
+ ### Training results
59
+
60
+ | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Accuracy |
61
+ |:-------------:|:------:|:----:|:---------------:|:------:|:---------:|:------:|:--------:|
62
+ | 0.3292 | 0.9993 | 701 | 0.1360 | 0.7966 | 0.7971 | 0.7961 | 0.9564 |
63
+ | 0.1207 | 2.0 | 1403 | 0.1172 | 0.8235 | 0.8146 | 0.8326 | 0.9623 |
64
+ | 0.0891 | 2.9993 | 2104 | 0.1133 | 0.8348 | 0.8307 | 0.8390 | 0.9640 |
65
+ | 0.0684 | 4.0 | 2806 | 0.1172 | 0.8386 | 0.8411 | 0.8362 | 0.9650 |
66
+ | 0.0527 | 4.9993 | 3507 | 0.1268 | 0.8371 | 0.8302 | 0.8441 | 0.9645 |
67
+ | 0.0414 | 6.0 | 4209 | 0.1425 | 0.8390 | 0.8329 | 0.8453 | 0.9649 |
68
+ | 0.0329 | 6.9993 | 4910 | 0.1532 | 0.8385 | 0.8374 | 0.8396 | 0.9647 |
69
+ | 0.0263 | 8.0 | 5612 | 0.1650 | 0.8359 | 0.8287 | 0.8433 | 0.9645 |
70
+ | 0.0222 | 8.9993 | 6313 | 0.1793 | 0.8396 | 0.8398 | 0.8395 | 0.9652 |
71
+ | 0.019 | 9.9929 | 7010 | 0.1841 | 0.8384 | 0.8348 | 0.8421 | 0.9649 |
72
+
73
+
74
+ ### Framework versions
75
+
76
+ - Transformers 4.43.4
77
+ - Pytorch 2.4.1+cu121
78
+ - Datasets 2.20.0
79
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0",
3
+ "adapters": {
4
+ "adapters": {},
5
+ "config_map": {},
6
+ "fusion_config_map": {},
7
+ "fusions": {}
8
+ },
9
+ "architectures": [
10
+ "XLMRobertaForTokenClassification"
11
+ ],
12
+ "attention_probs_dropout_prob": 0.1,
13
+ "bos_token_id": 0,
14
+ "classifier_dropout": null,
15
+ "eos_token_id": 2,
16
+ "gradient_checkpointing": false,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 1024,
20
+ "id2label": {
21
+ "0": "O",
22
+ "1": "B-PER",
23
+ "2": "I-PER",
24
+ "3": "B-ORG",
25
+ "4": "I-ORG",
26
+ "5": "B-LOC",
27
+ "6": "I-LOC"
28
+ },
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 4096,
31
+ "label2id": {
32
+ "B-LOC": 5,
33
+ "B-ORG": 3,
34
+ "B-PER": 1,
35
+ "I-LOC": 6,
36
+ "I-ORG": 4,
37
+ "I-PER": 2,
38
+ "O": 0
39
+ },
40
+ "layer_norm_eps": 1e-05,
41
+ "max_position_embeddings": 514,
42
+ "model_type": "xlm-roberta",
43
+ "num_attention_heads": 16,
44
+ "num_hidden_layers": 24,
45
+ "output_past": true,
46
+ "pad_token_id": 1,
47
+ "position_embedding_type": "absolute",
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.43.4",
50
+ "type_vocab_size": 1,
51
+ "use_cache": true,
52
+ "vocab_size": 250002
53
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a934a395ee5daa359a70235c4bc403f3729523185a4960b473af446b6fca9619
3
+ size 2235440556
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 512,
50
+ "pad_token": "<pad>",
51
+ "sep_token": "</s>",
52
+ "tokenizer_class": "XLMRobertaTokenizer",
53
+ "unk_token": "<unk>"
54
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76e91c3dd62aaab656548a75c622e1735d876af0522177fb571c39e2911aef09
3
+ size 5304
wandb/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Current SDK version is 0.17.9
2
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Configure stats pid to 1289831
3
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Loading settings from /homes/amrufai/.config/wandb/settings
4
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Loading settings from /nfs/production/literature/amina-mardiyyah/wandb/settings
5
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
6
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Applying login settings: {}
8
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Applying login settings: {}
9
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_init.py:_log_setup():524] Logging user logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4/wandb/run-20240917_155335-i6pu2qqp/logs/debug.log
10
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_init.py:_log_setup():525] Logging internal logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4/wandb/run-20240917_155335-i6pu2qqp/logs/debug-internal.log
11
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_init.py:_jupyter_setup():470] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f917fe558d0>
12
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():608] calling init triggers
13
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():615] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():658] starting backend
16
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():662] setting up manager
17
+ 2024-09-17 15:53:35,210 INFO MainThread:1289831 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-09-17 15:53:35,213 INFO MainThread:1289831 [wandb_init.py:init():670] backend started and connected
19
+ 2024-09-17 15:53:35,221 INFO MainThread:1289831 [wandb_run.py:_label_probe_notebook():1344] probe notebook
20
+ 2024-09-17 15:53:35,243 INFO MainThread:1289831 [wandb_run.py:_label_probe_notebook():1354] Unable to probe notebook: 'NoneType' object has no attribute 'get'
21
+ 2024-09-17 15:53:35,243 INFO MainThread:1289831 [wandb_init.py:init():768] updated telemetry
22
+ 2024-09-17 15:53:35,257 INFO MainThread:1289831 [wandb_init.py:init():801] communicating run to backend with 90.0 second timeout
23
+ 2024-09-17 15:53:35,636 INFO MainThread:1289831 [wandb_init.py:init():852] starting run threads in backend
24
+ 2024-09-17 15:53:36,384 INFO MainThread:1289831 [wandb_run.py:_console_start():2465] atexit reg
25
+ 2024-09-17 15:53:36,385 INFO MainThread:1289831 [wandb_run.py:_redirect():2311] redirect: wrap_raw
26
+ 2024-09-17 15:53:36,385 INFO MainThread:1289831 [wandb_run.py:_redirect():2376] Wrapping output streams.
27
+ 2024-09-17 15:53:36,385 INFO MainThread:1289831 [wandb_run.py:_redirect():2401] Redirects installed.
28
+ 2024-09-17 15:53:36,390 INFO MainThread:1289831 [wandb_init.py:init():895] run started, returning control to user process
29
+ 2024-09-17 15:53:36,399 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
30
+ 2024-09-17 15:53:36,399 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
31
+ 2024-09-17 15:53:36,409 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
32
+ 2024-09-17 15:53:36,452 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
33
+ 2024-09-17 15:53:36,453 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
34
+ 2024-09-17 15:53:36,459 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
35
+ 2024-09-17 15:53:36,464 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
36
+ 2024-09-17 15:53:36,464 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
37
+ 2024-09-17 15:53:36,470 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
38
+ 2024-09-17 15:53:43,871 INFO MainThread:1289831 [wandb_run.py:_config_callback():1392] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['XLMRobertaForTokenClassification'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-PER', 2: 'I-PER', 3: 'B-ORG', 4: 'I-ORG', 5: 'B-LOC', 6: 'I-LOC'}, 'label2id': {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0', 'transformers_version': '4.43.4', 'adapters': {'adapters': {}, 'config_map': {}, 'fusion_config_map': {}, 'fusions': {}}, 'gradient_checkpointing': False, 'model_type': 'xlm-roberta', 'output_past': True, 'vocab_size': 250002, 'hidden_size': 1024, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 514, 'type_vocab_size': 1, 'initializer_range': 0.02, 'layer_norm_eps': 1e-05, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4/logs', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'eval_use_gather_object': False}
39
+ 2024-09-17 15:53:43,873 INFO MainThread:1289831 [wandb_config.py:__setitem__():154] config set model/num_parameters = 558848007 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f9142db3c90>>
40
+ 2024-09-17 15:53:43,873 INFO MainThread:1289831 [wandb_run.py:_config_callback():1392] config_cb model/num_parameters 558848007 None
41
+ 2024-09-17 17:22:57,597 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
42
+ 2024-09-17 17:22:57,598 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
43
+ 2024-09-17 17:59:40,303 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
44
+ 2024-09-17 17:59:40,309 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
45
+ 2024-09-17 17:59:40,309 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
46
+ 2024-09-17 19:32:52,614 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
wandb/run-20240917_155335-i6pu2qqp/files/config.yaml ADDED
@@ -0,0 +1,742 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.4
7
+ cli_version: 0.17.9
8
+ framework: huggingface
9
+ huggingface_version: 4.43.4
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1726584815
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 41
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 98
25
+ - 100
26
+ - 105
27
+ 2:
28
+ - 1
29
+ - 5
30
+ - 11
31
+ - 41
32
+ - 49
33
+ - 51
34
+ - 53
35
+ - 55
36
+ - 71
37
+ - 98
38
+ - 100
39
+ - 105
40
+ 3:
41
+ - 7
42
+ - 19
43
+ - 23
44
+ - 62
45
+ - 66
46
+ 4: 3.11.4
47
+ 5: 0.17.9
48
+ 6: 4.43.4
49
+ 8:
50
+ - 1
51
+ - 5
52
+ 9:
53
+ 1: transformers_trainer
54
+ 13: linux-x86_64
55
+ m:
56
+ - 1: train/global_step
57
+ 6:
58
+ - 3
59
+ - 1: train/loss
60
+ 5: 1
61
+ 6:
62
+ - 1
63
+ - 1: train/grad_norm
64
+ 5: 1
65
+ 6:
66
+ - 1
67
+ - 1: train/learning_rate
68
+ 5: 1
69
+ 6:
70
+ - 1
71
+ - 1: train/epoch
72
+ 5: 1
73
+ 6:
74
+ - 1
75
+ - 1: eval/loss
76
+ 5: 1
77
+ 6:
78
+ - 1
79
+ - 1: eval/f1
80
+ 5: 1
81
+ 6:
82
+ - 1
83
+ - 1: eval/precision
84
+ 5: 1
85
+ 6:
86
+ - 1
87
+ - 1: eval/recall
88
+ 5: 1
89
+ 6:
90
+ - 1
91
+ - 1: eval/accuracy
92
+ 5: 1
93
+ 6:
94
+ - 1
95
+ - 1: eval/runtime
96
+ 5: 1
97
+ 6:
98
+ - 1
99
+ - 1: eval/samples_per_second
100
+ 5: 1
101
+ 6:
102
+ - 1
103
+ - 1: eval/steps_per_second
104
+ 5: 1
105
+ 6:
106
+ - 1
107
+ return_dict:
108
+ desc: null
109
+ value: true
110
+ output_hidden_states:
111
+ desc: null
112
+ value: false
113
+ output_attentions:
114
+ desc: null
115
+ value: false
116
+ torchscript:
117
+ desc: null
118
+ value: false
119
+ torch_dtype:
120
+ desc: null
121
+ value: float32
122
+ use_bfloat16:
123
+ desc: null
124
+ value: false
125
+ tf_legacy_loss:
126
+ desc: null
127
+ value: false
128
+ pruned_heads:
129
+ desc: null
130
+ value: {}
131
+ tie_word_embeddings:
132
+ desc: null
133
+ value: true
134
+ chunk_size_feed_forward:
135
+ desc: null
136
+ value: 0
137
+ is_encoder_decoder:
138
+ desc: null
139
+ value: false
140
+ is_decoder:
141
+ desc: null
142
+ value: false
143
+ cross_attention_hidden_size:
144
+ desc: null
145
+ value: null
146
+ add_cross_attention:
147
+ desc: null
148
+ value: false
149
+ tie_encoder_decoder:
150
+ desc: null
151
+ value: false
152
+ max_length:
153
+ desc: null
154
+ value: 20
155
+ min_length:
156
+ desc: null
157
+ value: 0
158
+ do_sample:
159
+ desc: null
160
+ value: false
161
+ early_stopping:
162
+ desc: null
163
+ value: false
164
+ num_beams:
165
+ desc: null
166
+ value: 1
167
+ num_beam_groups:
168
+ desc: null
169
+ value: 1
170
+ diversity_penalty:
171
+ desc: null
172
+ value: 0.0
173
+ temperature:
174
+ desc: null
175
+ value: 1.0
176
+ top_k:
177
+ desc: null
178
+ value: 50
179
+ top_p:
180
+ desc: null
181
+ value: 1.0
182
+ typical_p:
183
+ desc: null
184
+ value: 1.0
185
+ repetition_penalty:
186
+ desc: null
187
+ value: 1.0
188
+ length_penalty:
189
+ desc: null
190
+ value: 1.0
191
+ no_repeat_ngram_size:
192
+ desc: null
193
+ value: 0
194
+ encoder_no_repeat_ngram_size:
195
+ desc: null
196
+ value: 0
197
+ bad_words_ids:
198
+ desc: null
199
+ value: null
200
+ num_return_sequences:
201
+ desc: null
202
+ value: 1
203
+ output_scores:
204
+ desc: null
205
+ value: false
206
+ return_dict_in_generate:
207
+ desc: null
208
+ value: false
209
+ forced_bos_token_id:
210
+ desc: null
211
+ value: null
212
+ forced_eos_token_id:
213
+ desc: null
214
+ value: null
215
+ remove_invalid_values:
216
+ desc: null
217
+ value: false
218
+ exponential_decay_length_penalty:
219
+ desc: null
220
+ value: null
221
+ suppress_tokens:
222
+ desc: null
223
+ value: null
224
+ begin_suppress_tokens:
225
+ desc: null
226
+ value: null
227
+ architectures:
228
+ desc: null
229
+ value:
230
+ - XLMRobertaForTokenClassification
231
+ finetuning_task:
232
+ desc: null
233
+ value: null
234
+ id2label:
235
+ desc: null
236
+ value:
237
+ '0': O
238
+ '1': B-PER
239
+ '2': I-PER
240
+ '3': B-ORG
241
+ '4': I-ORG
242
+ '5': B-LOC
243
+ '6': I-LOC
244
+ label2id:
245
+ desc: null
246
+ value:
247
+ O: 0
248
+ B-PER: 1
249
+ I-PER: 2
250
+ B-ORG: 3
251
+ I-ORG: 4
252
+ B-LOC: 5
253
+ I-LOC: 6
254
+ tokenizer_class:
255
+ desc: null
256
+ value: null
257
+ prefix:
258
+ desc: null
259
+ value: null
260
+ bos_token_id:
261
+ desc: null
262
+ value: 0
263
+ pad_token_id:
264
+ desc: null
265
+ value: 1
266
+ eos_token_id:
267
+ desc: null
268
+ value: 2
269
+ sep_token_id:
270
+ desc: null
271
+ value: null
272
+ decoder_start_token_id:
273
+ desc: null
274
+ value: null
275
+ task_specific_params:
276
+ desc: null
277
+ value: null
278
+ problem_type:
279
+ desc: null
280
+ value: null
281
+ _name_or_path:
282
+ desc: null
283
+ value: masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0
284
+ transformers_version:
285
+ desc: null
286
+ value: 4.43.4
287
+ adapters:
288
+ desc: null
289
+ value:
290
+ adapters: {}
291
+ config_map: {}
292
+ fusion_config_map: {}
293
+ fusions: {}
294
+ gradient_checkpointing:
295
+ desc: null
296
+ value: false
297
+ model_type:
298
+ desc: null
299
+ value: xlm-roberta
300
+ output_past:
301
+ desc: null
302
+ value: true
303
+ vocab_size:
304
+ desc: null
305
+ value: 250002
306
+ hidden_size:
307
+ desc: null
308
+ value: 1024
309
+ num_hidden_layers:
310
+ desc: null
311
+ value: 24
312
+ num_attention_heads:
313
+ desc: null
314
+ value: 16
315
+ hidden_act:
316
+ desc: null
317
+ value: gelu
318
+ intermediate_size:
319
+ desc: null
320
+ value: 4096
321
+ hidden_dropout_prob:
322
+ desc: null
323
+ value: 0.1
324
+ attention_probs_dropout_prob:
325
+ desc: null
326
+ value: 0.1
327
+ max_position_embeddings:
328
+ desc: null
329
+ value: 514
330
+ type_vocab_size:
331
+ desc: null
332
+ value: 1
333
+ initializer_range:
334
+ desc: null
335
+ value: 0.02
336
+ layer_norm_eps:
337
+ desc: null
338
+ value: 1.0e-05
339
+ position_embedding_type:
340
+ desc: null
341
+ value: absolute
342
+ use_cache:
343
+ desc: null
344
+ value: true
345
+ classifier_dropout:
346
+ desc: null
347
+ value: null
348
+ output_dir:
349
+ desc: null
350
+ value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4
351
+ overwrite_output_dir:
352
+ desc: null
353
+ value: false
354
+ do_train:
355
+ desc: null
356
+ value: false
357
+ do_eval:
358
+ desc: null
359
+ value: true
360
+ do_predict:
361
+ desc: null
362
+ value: false
363
+ eval_strategy:
364
+ desc: null
365
+ value: epoch
366
+ prediction_loss_only:
367
+ desc: null
368
+ value: false
369
+ per_device_train_batch_size:
370
+ desc: null
371
+ value: 16
372
+ per_device_eval_batch_size:
373
+ desc: null
374
+ value: 8
375
+ per_gpu_train_batch_size:
376
+ desc: null
377
+ value: null
378
+ per_gpu_eval_batch_size:
379
+ desc: null
380
+ value: null
381
+ gradient_accumulation_steps:
382
+ desc: null
383
+ value: 4
384
+ eval_accumulation_steps:
385
+ desc: null
386
+ value: null
387
+ eval_delay:
388
+ desc: null
389
+ value: 0
390
+ torch_empty_cache_steps:
391
+ desc: null
392
+ value: null
393
+ learning_rate:
394
+ desc: null
395
+ value: 2.0e-05
396
+ weight_decay:
397
+ desc: null
398
+ value: 0.01
399
+ adam_beta1:
400
+ desc: null
401
+ value: 0.9
402
+ adam_beta2:
403
+ desc: null
404
+ value: 0.999
405
+ adam_epsilon:
406
+ desc: null
407
+ value: 1.0e-08
408
+ max_grad_norm:
409
+ desc: null
410
+ value: 1.0
411
+ num_train_epochs:
412
+ desc: null
413
+ value: 10
414
+ max_steps:
415
+ desc: null
416
+ value: -1
417
+ lr_scheduler_type:
418
+ desc: null
419
+ value: linear
420
+ lr_scheduler_kwargs:
421
+ desc: null
422
+ value: {}
423
+ warmup_ratio:
424
+ desc: null
425
+ value: 0.0
426
+ warmup_steps:
427
+ desc: null
428
+ value: 1000
429
+ log_level:
430
+ desc: null
431
+ value: passive
432
+ log_level_replica:
433
+ desc: null
434
+ value: warning
435
+ log_on_each_node:
436
+ desc: null
437
+ value: true
438
+ logging_dir:
439
+ desc: null
440
+ value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4/logs
441
+ logging_strategy:
442
+ desc: null
443
+ value: epoch
444
+ logging_first_step:
445
+ desc: null
446
+ value: false
447
+ logging_steps:
448
+ desc: null
449
+ value: 500
450
+ logging_nan_inf_filter:
451
+ desc: null
452
+ value: true
453
+ save_strategy:
454
+ desc: null
455
+ value: epoch
456
+ save_steps:
457
+ desc: null
458
+ value: 500
459
+ save_total_limit:
460
+ desc: null
461
+ value: 2
462
+ save_safetensors:
463
+ desc: null
464
+ value: true
465
+ save_on_each_node:
466
+ desc: null
467
+ value: false
468
+ save_only_model:
469
+ desc: null
470
+ value: false
471
+ restore_callback_states_from_checkpoint:
472
+ desc: null
473
+ value: false
474
+ no_cuda:
475
+ desc: null
476
+ value: false
477
+ use_cpu:
478
+ desc: null
479
+ value: false
480
+ use_mps_device:
481
+ desc: null
482
+ value: false
483
+ seed:
484
+ desc: null
485
+ value: 3407
486
+ data_seed:
487
+ desc: null
488
+ value: null
489
+ jit_mode_eval:
490
+ desc: null
491
+ value: false
492
+ use_ipex:
493
+ desc: null
494
+ value: false
495
+ bf16:
496
+ desc: null
497
+ value: false
498
+ fp16:
499
+ desc: null
500
+ value: true
501
+ fp16_opt_level:
502
+ desc: null
503
+ value: O1
504
+ half_precision_backend:
505
+ desc: null
506
+ value: auto
507
+ bf16_full_eval:
508
+ desc: null
509
+ value: false
510
+ fp16_full_eval:
511
+ desc: null
512
+ value: false
513
+ tf32:
514
+ desc: null
515
+ value: null
516
+ local_rank:
517
+ desc: null
518
+ value: 0
519
+ ddp_backend:
520
+ desc: null
521
+ value: null
522
+ tpu_num_cores:
523
+ desc: null
524
+ value: null
525
+ tpu_metrics_debug:
526
+ desc: null
527
+ value: false
528
+ debug:
529
+ desc: null
530
+ value: []
531
+ dataloader_drop_last:
532
+ desc: null
533
+ value: false
534
+ eval_steps:
535
+ desc: null
536
+ value: null
537
+ dataloader_num_workers:
538
+ desc: null
539
+ value: 0
540
+ dataloader_prefetch_factor:
541
+ desc: null
542
+ value: null
543
+ past_index:
544
+ desc: null
545
+ value: -1
546
+ run_name:
547
+ desc: null
548
+ value: /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4
549
+ disable_tqdm:
550
+ desc: null
551
+ value: false
552
+ remove_unused_columns:
553
+ desc: null
554
+ value: true
555
+ label_names:
556
+ desc: null
557
+ value: null
558
+ load_best_model_at_end:
559
+ desc: null
560
+ value: true
561
+ metric_for_best_model:
562
+ desc: null
563
+ value: eval_f1
564
+ greater_is_better:
565
+ desc: null
566
+ value: true
567
+ ignore_data_skip:
568
+ desc: null
569
+ value: false
570
+ fsdp:
571
+ desc: null
572
+ value: []
573
+ fsdp_min_num_params:
574
+ desc: null
575
+ value: 0
576
+ fsdp_config:
577
+ desc: null
578
+ value:
579
+ min_num_params: 0
580
+ xla: false
581
+ xla_fsdp_v2: false
582
+ xla_fsdp_grad_ckpt: false
583
+ fsdp_transformer_layer_cls_to_wrap:
584
+ desc: null
585
+ value: null
586
+ accelerator_config:
587
+ desc: null
588
+ value:
589
+ split_batches: false
590
+ dispatch_batches: null
591
+ even_batches: true
592
+ use_seedable_sampler: true
593
+ non_blocking: false
594
+ gradient_accumulation_kwargs: null
595
+ deepspeed:
596
+ desc: null
597
+ value: null
598
+ label_smoothing_factor:
599
+ desc: null
600
+ value: 0.0
601
+ optim:
602
+ desc: null
603
+ value: adamw_torch
604
+ optim_args:
605
+ desc: null
606
+ value: null
607
+ adafactor:
608
+ desc: null
609
+ value: false
610
+ group_by_length:
611
+ desc: null
612
+ value: true
613
+ length_column_name:
614
+ desc: null
615
+ value: length
616
+ report_to:
617
+ desc: null
618
+ value:
619
+ - wandb
620
+ ddp_find_unused_parameters:
621
+ desc: null
622
+ value: null
623
+ ddp_bucket_cap_mb:
624
+ desc: null
625
+ value: null
626
+ ddp_broadcast_buffers:
627
+ desc: null
628
+ value: null
629
+ dataloader_pin_memory:
630
+ desc: null
631
+ value: true
632
+ dataloader_persistent_workers:
633
+ desc: null
634
+ value: false
635
+ skip_memory_metrics:
636
+ desc: null
637
+ value: true
638
+ use_legacy_prediction_loop:
639
+ desc: null
640
+ value: false
641
+ push_to_hub:
642
+ desc: null
643
+ value: false
644
+ resume_from_checkpoint:
645
+ desc: null
646
+ value: null
647
+ hub_model_id:
648
+ desc: null
649
+ value: null
650
+ hub_strategy:
651
+ desc: null
652
+ value: every_save
653
+ hub_token:
654
+ desc: null
655
+ value: <HUB_TOKEN>
656
+ hub_private_repo:
657
+ desc: null
658
+ value: false
659
+ hub_always_push:
660
+ desc: null
661
+ value: false
662
+ gradient_checkpointing_kwargs:
663
+ desc: null
664
+ value: null
665
+ include_inputs_for_metrics:
666
+ desc: null
667
+ value: false
668
+ eval_do_concat_batches:
669
+ desc: null
670
+ value: true
671
+ fp16_backend:
672
+ desc: null
673
+ value: auto
674
+ evaluation_strategy:
675
+ desc: null
676
+ value: null
677
+ push_to_hub_model_id:
678
+ desc: null
679
+ value: null
680
+ push_to_hub_organization:
681
+ desc: null
682
+ value: null
683
+ push_to_hub_token:
684
+ desc: null
685
+ value: <PUSH_TO_HUB_TOKEN>
686
+ mp_parameters:
687
+ desc: null
688
+ value: ''
689
+ auto_find_batch_size:
690
+ desc: null
691
+ value: false
692
+ full_determinism:
693
+ desc: null
694
+ value: false
695
+ torchdynamo:
696
+ desc: null
697
+ value: null
698
+ ray_scope:
699
+ desc: null
700
+ value: last
701
+ ddp_timeout:
702
+ desc: null
703
+ value: 1800
704
+ torch_compile:
705
+ desc: null
706
+ value: false
707
+ torch_compile_backend:
708
+ desc: null
709
+ value: null
710
+ torch_compile_mode:
711
+ desc: null
712
+ value: null
713
+ dispatch_batches:
714
+ desc: null
715
+ value: null
716
+ split_batches:
717
+ desc: null
718
+ value: null
719
+ include_tokens_per_second:
720
+ desc: null
721
+ value: false
722
+ include_num_input_tokens_seen:
723
+ desc: null
724
+ value: false
725
+ neftune_noise_alpha:
726
+ desc: null
727
+ value: null
728
+ optim_target_modules:
729
+ desc: null
730
+ value: null
731
+ batch_eval_metrics:
732
+ desc: null
733
+ value: false
734
+ eval_on_start:
735
+ desc: null
736
+ value: false
737
+ eval_use_gather_object:
738
+ desc: null
739
+ value: false
740
+ model/num_parameters:
741
+ desc: null
742
+ value: 558848007
wandb/run-20240917_155335-i6pu2qqp/files/output.log ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /nfs/production/literature/amina-mardiyyah/envs/llm-prompt/lib/python3.11/site-packages/accelerate/accelerator.py:488: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
2
+ self.scaler = torch.cuda.amp.GradScaler(**kwargs)
3
+ Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
4
+ wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
5
+ Classification Report:
6
+ precision recall f1-score support
7
+ LOC 0.74 0.80 0.76 9327
8
+ ORG 0.74 0.68 0.71 7547
9
+ PER 0.89 0.88 0.88 11204
10
+ micro avg 0.80 0.80 0.80 28078
11
+ macro avg 0.79 0.78 0.79 28078
12
+ weighted avg 0.80 0.80 0.80 28078
13
+ Classification Report:
14
+ precision recall f1-score support
15
+ LOC 0.81 0.78 0.80 9327
16
+ ORG 0.71 0.80 0.75 7547
17
+ PER 0.90 0.89 0.90 11204
18
+ micro avg 0.81 0.83 0.82 28078
19
+ macro avg 0.81 0.83 0.82 28078
20
+ weighted avg 0.82 0.83 0.82 28078
21
+ Classification Report:
22
+ precision recall f1-score support
23
+ LOC 0.81 0.80 0.80 9327
24
+ ORG 0.75 0.79 0.77 7547
25
+ PER 0.91 0.90 0.90 11204
26
+ micro avg 0.83 0.84 0.83 28078
27
+ macro avg 0.82 0.83 0.83 28078
28
+ weighted avg 0.83 0.84 0.84 28078
29
+ Classification Report:
30
+ precision recall f1-score support
31
+ LOC 0.80 0.81 0.81 9327
32
+ ORG 0.80 0.77 0.78 7547
33
+ PER 0.90 0.90 0.90 11204
34
+ micro avg 0.84 0.84 0.84 28078
35
+ macro avg 0.83 0.83 0.83 28078
36
+ weighted avg 0.84 0.84 0.84 28078
37
+ Classification Report:
38
+ precision recall f1-score support
39
+ LOC 0.80 0.81 0.81 9327
40
+ ORG 0.77 0.79 0.78 7547
41
+ PER 0.90 0.90 0.90 11204
42
+ micro avg 0.83 0.84 0.84 28078
43
+ macro avg 0.82 0.84 0.83 28078
44
+ weighted avg 0.83 0.84 0.84 28078
45
+ Classification Report:
46
+ precision recall f1-score support
47
+ LOC 0.80 0.81 0.81 9327
48
+ ORG 0.77 0.79 0.78 7547
49
+ PER 0.90 0.91 0.90 11204
50
+ micro avg 0.83 0.85 0.84 28078
51
+ macro avg 0.83 0.84 0.83 28078
52
+ weighted avg 0.83 0.85 0.84 28078
53
+ Classification Report:
54
+ precision recall f1-score support
55
+ LOC 0.82 0.80 0.81 9327
56
+ ORG 0.76 0.80 0.78 7547
57
+ PER 0.91 0.90 0.90 11204
58
+ micro avg 0.84 0.84 0.84 28078
59
+ macro avg 0.83 0.83 0.83 28078
60
+ weighted avg 0.84 0.84 0.84 28078
61
+ Classification Report:
62
+ precision recall f1-score support
63
+ LOC 0.80 0.81 0.81 9327
64
+ ORG 0.75 0.80 0.78 7547
65
+ PER 0.90 0.90 0.90 11204
66
+ micro avg 0.83 0.84 0.84 28078
67
+ macro avg 0.82 0.84 0.83 28078
68
+ weighted avg 0.83 0.84 0.84 28078
69
+ Classification Report:
70
+ precision recall f1-score support
71
+ LOC 0.81 0.80 0.81 9327
72
+ ORG 0.78 0.79 0.79 7547
73
+ PER 0.91 0.90 0.90 11204
74
+ micro avg 0.84 0.84 0.84 28078
75
+ macro avg 0.83 0.83 0.83 28078
76
+ weighted avg 0.84 0.84 0.84 28078
77
+ Classification Report:
78
+ precision recall f1-score support
79
+ LOC 0.80 0.81 0.80 9327
80
+ ORG 0.77 0.80 0.78 7547
81
+ PER 0.91 0.90 0.90 11204
82
+ micro avg 0.83 0.84 0.84 28078
83
+ macro avg 0.83 0.84 0.83 28078
wandb/run-20240917_155335-i6pu2qqp/files/requirements.txt ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AutoROM.accept-rom-license==0.6.1
2
+ AutoROM==0.6.1
3
+ Farama-Notifications==0.0.4
4
+ GitPython==3.1.43
5
+ Jinja2==3.1.4
6
+ Mako==1.3.5
7
+ Markdown==3.7
8
+ MarkupSafe==2.1.5
9
+ PyYAML==6.0.2
10
+ Pygments==2.18.0
11
+ QtPy==2.4.1
12
+ SQLAlchemy==2.0.32
13
+ Send2Trash==1.8.3
14
+ Shimmy==1.3.0
15
+ Werkzeug==3.0.4
16
+ absl-py==2.1.0
17
+ accelerate==0.33.0
18
+ aiohappyeyeballs==2.3.5
19
+ aiohttp-retry==2.8.3
20
+ aiohttp==3.10.3
21
+ aiosignal==1.3.1
22
+ ale-py==0.8.1
23
+ alembic==1.13.2
24
+ amqp==5.2.0
25
+ annotated-types==0.7.0
26
+ antlr4-python3-runtime==4.9.3
27
+ anyio==4.4.0
28
+ appdirs==1.4.4
29
+ argon2-cffi-bindings==21.2.0
30
+ argon2-cffi==23.1.0
31
+ arrow==1.3.0
32
+ asttokens==2.4.1
33
+ async-generator==1.10
34
+ async-lru==2.0.4
35
+ asyncssh==2.17.0
36
+ atpublic==5.0
37
+ attrs==24.2.0
38
+ audioread==3.0.1
39
+ babel==2.16.0
40
+ beautifulsoup4==4.12.3
41
+ billiard==4.2.0
42
+ bitsandbytes==0.43.3
43
+ bleach==6.1.0
44
+ blis==0.7.11
45
+ catalogue==2.0.10
46
+ celery==5.4.0
47
+ certifi==2024.7.4
48
+ certipy==0.1.3
49
+ cffi==1.17.0
50
+ charset-normalizer==3.3.2
51
+ click-didyoumean==0.3.1
52
+ click-plugins==1.1.1
53
+ click-repl==0.3.0
54
+ click==8.1.7
55
+ cloudpathlib==0.18.1
56
+ cloudpickle==3.0.0
57
+ colorama==0.4.6
58
+ coloredlogs==15.0.1
59
+ comm==0.2.2
60
+ conda-store==2024.6.1
61
+ confection==0.1.5
62
+ configobj==5.0.8
63
+ contourpy==1.2.1
64
+ cryptography==43.0.0
65
+ cuda-python==11.8.3
66
+ cycler==0.12.1
67
+ cymem==2.0.8
68
+ datasets==2.20.0
69
+ debugpy==1.8.5
70
+ decorator==5.1.1
71
+ defusedxml==0.7.1
72
+ dictdiffer==0.9.0
73
+ dill==0.3.8
74
+ diskcache==5.6.3
75
+ distro==1.9.0
76
+ docker-pycreds==0.4.0
77
+ docstring_parser==0.16
78
+ dpath==2.2.0
79
+ dulwich==0.22.1
80
+ dvc-data==3.16.5
81
+ dvc-http==2.32.0
82
+ dvc-objects==5.1.0
83
+ dvc-render==1.0.2
84
+ dvc-studio-client==0.21.0
85
+ dvc-task==0.4.0
86
+ dvc==3.55.2
87
+ en-core-web-lg==3.7.1
88
+ en-core-web-sm==3.7.1
89
+ entrypoints==0.4
90
+ evaluate==0.4.2
91
+ executing==2.0.1
92
+ fastjsonschema==2.20.0
93
+ filelock==3.15.4
94
+ flatbuffers==24.3.25
95
+ flatten-dict==0.4.2
96
+ flufl.lock==8.1.0
97
+ fonttools==4.53.1
98
+ fqdn==1.5.1
99
+ frozenlist==1.4.1
100
+ fsspec==2024.5.0
101
+ funcy==2.0
102
+ gitdb==4.0.11
103
+ grandalf==0.8
104
+ greenlet==3.0.3
105
+ grpcio==1.66.0
106
+ gto==1.7.1
107
+ gymnasium==0.29.1
108
+ h11==0.14.0
109
+ httpcore==1.0.5
110
+ httpx==0.27.0
111
+ huggingface-hub==0.24.5
112
+ humanfriendly==10.0
113
+ hydra-core==1.3.2
114
+ idna==3.7
115
+ importlib_resources==6.4.5
116
+ ipykernel==6.29.5
117
+ ipython==8.26.0
118
+ ipywidgets==8.1.3
119
+ isoduration==20.11.0
120
+ iterative-telemetry==0.0.8
121
+ jedi==0.19.1
122
+ joblib==1.4.2
123
+ json5==0.9.25
124
+ jsonpointer==3.0.0
125
+ jsonschema-specifications==2023.12.1
126
+ jsonschema==4.23.0
127
+ jupyter-console==6.6.3
128
+ jupyter-events==0.10.0
129
+ jupyter-launcher-shortcuts==4.0.3
130
+ jupyter-lsp==2.2.5
131
+ jupyter-telemetry==0.1.0
132
+ jupyter==1.0.0
133
+ jupyter_client==8.6.2
134
+ jupyter_core==5.7.2
135
+ jupyter_server==2.14.2
136
+ jupyter_server_terminals==0.5.3
137
+ jupyterhub==1.5.1
138
+ jupyterlab-conda-store==2024.6.1
139
+ jupyterlab==4.2.4
140
+ jupyterlab_pygments==0.3.0
141
+ jupyterlab_server==2.27.3
142
+ jupyterlab_widgets==3.0.11
143
+ kiwisolver==1.4.5
144
+ kombu==5.4.0
145
+ lab==8.2
146
+ langcodes==3.4.0
147
+ language_data==1.2.0
148
+ lazy_loader==0.4
149
+ librosa==0.10.2.post1
150
+ llvmlite==0.43.0
151
+ lxml==5.3.0
152
+ marisa-trie==1.2.0
153
+ markdown-it-py==3.0.0
154
+ matplotlib-inline==0.1.7
155
+ matplotlib==3.9.1.post1
156
+ mdurl==0.1.2
157
+ mistune==3.0.2
158
+ mpmath==1.3.0
159
+ msgpack==1.1.0
160
+ multidict==6.0.5
161
+ multiprocess==0.70.16
162
+ murmurhash==1.0.10
163
+ nb_conda_store_kernels==0.1.5
164
+ nbclient==0.10.0
165
+ nbconvert==7.16.4
166
+ nbformat==5.10.4
167
+ nest-asyncio==1.6.0
168
+ networkx==3.3
169
+ nodejs==0.1.1
170
+ notebook==7.2.1
171
+ notebook_shim==0.2.4
172
+ numba==0.60.0
173
+ numpy==1.26.4
174
+ nvidia-cublas-cu12==12.1.3.1
175
+ nvidia-cuda-cupti-cu12==12.1.105
176
+ nvidia-cuda-nvrtc-cu12==12.1.105
177
+ nvidia-cuda-runtime-cu12==12.1.105
178
+ nvidia-cudnn-cu12==8.9.2.26
179
+ nvidia-cudnn-cu12==9.1.0.70
180
+ nvidia-cufft-cu12==11.0.2.54
181
+ nvidia-curand-cu12==10.3.2.106
182
+ nvidia-cusolver-cu12==11.4.5.107
183
+ nvidia-cusparse-cu12==12.1.0.106
184
+ nvidia-nccl-cu12==2.19.3
185
+ nvidia-nccl-cu12==2.20.5
186
+ nvidia-nvjitlink-cu12==12.6.20
187
+ nvidia-nvtx-cu12==12.1.105
188
+ oauthlib==3.2.2
189
+ omegaconf==2.3.0
190
+ onnx==1.16.2
191
+ onnxruntime==1.19.0
192
+ opencv-python==4.10.0.84
193
+ optimum==1.21.4
194
+ optional-django==0.1.0
195
+ orjson==3.10.7
196
+ overrides==7.7.0
197
+ packaging==24.1
198
+ pamela==1.2.0
199
+ pandas==2.2.2
200
+ pandocfilters==1.5.1
201
+ parso==0.8.4
202
+ pathspec==0.12.1
203
+ peft==0.12.0
204
+ pexpect==4.9.0
205
+ pillow==10.4.0
206
+ pip==23.1.2
207
+ platformdirs==3.11.0
208
+ pooch==1.8.2
209
+ preshed==3.0.9
210
+ prometheus_client==0.20.0
211
+ prompt_toolkit==3.0.47
212
+ protobuf==5.27.3
213
+ psutil==6.0.0
214
+ ptyprocess==0.7.0
215
+ pure_eval==0.2.3
216
+ pyOpenSSL==24.2.1
217
+ pyarrow-hotfix==0.6
218
+ pyarrow==17.0.0
219
+ pycparser==2.22
220
+ pydantic==2.8.2
221
+ pydantic_core==2.20.1
222
+ pydot==3.0.1
223
+ pygame==2.6.0
224
+ pygit2==1.15.1
225
+ pygtrie==2.5.0
226
+ pyparsing==3.1.2
227
+ python-dateutil==2.9.0.post0
228
+ python-dotenv==1.0.1
229
+ python-json-logger==2.0.7
230
+ pytz==2024.1
231
+ pyzmq==26.1.0
232
+ qtconsole==5.5.2
233
+ referencing==0.35.1
234
+ regex==2024.7.24
235
+ requests==2.32.3
236
+ rfc3339-validator==0.1.4
237
+ rfc3986-validator==0.1.1
238
+ rich==13.7.1
239
+ rpds-py==0.20.0
240
+ ruamel.yaml.clib==0.2.8
241
+ ruamel.yaml==0.18.6
242
+ safetensors==0.4.4
243
+ scikit-learn==1.5.1
244
+ scipy==1.14.0
245
+ scmrepo==3.3.7
246
+ seaborn==0.13.2
247
+ semver==3.0.2
248
+ sentencepiece==0.2.0
249
+ sentry-sdk==2.14.0
250
+ seqeval==1.2.2
251
+ setproctitle==1.3.3
252
+ setuptools==65.5.0
253
+ shellingham==1.5.4
254
+ shortuuid==1.0.13
255
+ shtab==1.7.1
256
+ simplejson==3.19.2
257
+ six==1.16.0
258
+ smart-open==7.0.4
259
+ smmap==5.0.1
260
+ sniffio==1.3.1
261
+ soundfile==0.12.1
262
+ soupsieve==2.5
263
+ sox==1.5.0
264
+ soxr==0.5.0.post1
265
+ spacy-legacy==3.0.12
266
+ spacy-loggers==1.0.5
267
+ spacy==3.7.5
268
+ sqltrie==0.11.1
269
+ srsly==2.4.8
270
+ stable_baselines3==2.3.2
271
+ stack-data==0.6.3
272
+ sympy==1.13.2
273
+ tabulate==0.9.0
274
+ tensorboard-data-server==0.7.2
275
+ tensorboard==2.17.1
276
+ terminado==0.18.1
277
+ thinc==8.2.5
278
+ threadpoolctl==3.5.0
279
+ tinycss2==1.3.0
280
+ tokenizers==0.19.1
281
+ tomlkit==0.13.2
282
+ torch==2.4.1
283
+ torchaudio==2.4.1
284
+ torchvision==0.19.1
285
+ tornado==6.4.1
286
+ tqdm==4.66.5
287
+ traitlets==5.14.3
288
+ transformers==4.43.4
289
+ triton==3.0.0
290
+ trl==0.9.6
291
+ txt2tags==3.9
292
+ typer==0.12.3
293
+ types-python-dateutil==2.9.0.20240316
294
+ typing_extensions==4.12.2
295
+ tyro==0.8.6
296
+ tzdata==2024.1
297
+ uri-template==1.3.0
298
+ urllib3==2.2.2
299
+ vine==5.1.0
300
+ voluptuous==0.15.2
301
+ wandb==0.17.9
302
+ wasabi==1.1.3
303
+ wcwidth==0.2.13
304
+ weasel==0.4.1
305
+ webcolors==24.8.0
306
+ webencodings==0.5.1
307
+ websocket-client==1.8.0
308
+ widgetsnbextension==4.0.11
309
+ wrapt==1.16.0
310
+ xlrd==2.0.1
311
+ xxhash==3.4.1
312
+ yarl==1.9.4
313
+ zc.lockfile==3.0.post1
wandb/run-20240917_155335-i6pu2qqp/files/wandb-metadata.json ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28",
3
+ "python": "3.11.4",
4
+ "heartbeatAt": "2024-09-17T14:53:35.798041",
5
+ "startedAt": "2024-09-17T14:53:35.186152",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "<python with no main file>",
11
+ "codePathLocal": null,
12
+ "host": "codon-gpu-014.ebi.ac.uk",
13
+ "username": "amrufai",
14
+ "executable": "/nfs/production/literature/amina-mardiyyah/envs/llm-prompt/bin/python",
15
+ "cpu_count": 48,
16
+ "cpu_count_logical": 48,
17
+ "cpu_freq": {
18
+ "current": 2758.514625,
19
+ "min": 0.0,
20
+ "max": 0.0
21
+ },
22
+ "cpu_freq_per_core": [
23
+ {
24
+ "current": 2800.0,
25
+ "min": 0.0,
26
+ "max": 0.0
27
+ },
28
+ {
29
+ "current": 2800.0,
30
+ "min": 0.0,
31
+ "max": 0.0
32
+ },
33
+ {
34
+ "current": 2800.0,
35
+ "min": 0.0,
36
+ "max": 0.0
37
+ },
38
+ {
39
+ "current": 2800.0,
40
+ "min": 0.0,
41
+ "max": 0.0
42
+ },
43
+ {
44
+ "current": 2800.0,
45
+ "min": 0.0,
46
+ "max": 0.0
47
+ },
48
+ {
49
+ "current": 2800.0,
50
+ "min": 0.0,
51
+ "max": 0.0
52
+ },
53
+ {
54
+ "current": 808.702,
55
+ "min": 0.0,
56
+ "max": 0.0
57
+ },
58
+ {
59
+ "current": 2800.0,
60
+ "min": 0.0,
61
+ "max": 0.0
62
+ },
63
+ {
64
+ "current": 2800.0,
65
+ "min": 0.0,
66
+ "max": 0.0
67
+ },
68
+ {
69
+ "current": 2800.0,
70
+ "min": 0.0,
71
+ "max": 0.0
72
+ },
73
+ {
74
+ "current": 2800.0,
75
+ "min": 0.0,
76
+ "max": 0.0
77
+ },
78
+ {
79
+ "current": 2800.0,
80
+ "min": 0.0,
81
+ "max": 0.0
82
+ },
83
+ {
84
+ "current": 2800.0,
85
+ "min": 0.0,
86
+ "max": 0.0
87
+ },
88
+ {
89
+ "current": 2800.0,
90
+ "min": 0.0,
91
+ "max": 0.0
92
+ },
93
+ {
94
+ "current": 2800.0,
95
+ "min": 0.0,
96
+ "max": 0.0
97
+ },
98
+ {
99
+ "current": 2800.0,
100
+ "min": 0.0,
101
+ "max": 0.0
102
+ },
103
+ {
104
+ "current": 2800.0,
105
+ "min": 0.0,
106
+ "max": 0.0
107
+ },
108
+ {
109
+ "current": 2800.0,
110
+ "min": 0.0,
111
+ "max": 0.0
112
+ },
113
+ {
114
+ "current": 2800.0,
115
+ "min": 0.0,
116
+ "max": 0.0
117
+ },
118
+ {
119
+ "current": 2800.0,
120
+ "min": 0.0,
121
+ "max": 0.0
122
+ },
123
+ {
124
+ "current": 2800.0,
125
+ "min": 0.0,
126
+ "max": 0.0
127
+ },
128
+ {
129
+ "current": 2800.0,
130
+ "min": 0.0,
131
+ "max": 0.0
132
+ },
133
+ {
134
+ "current": 2800.0,
135
+ "min": 0.0,
136
+ "max": 0.0
137
+ },
138
+ {
139
+ "current": 2800.0,
140
+ "min": 0.0,
141
+ "max": 0.0
142
+ },
143
+ {
144
+ "current": 2800.0,
145
+ "min": 0.0,
146
+ "max": 0.0
147
+ },
148
+ {
149
+ "current": 2800.0,
150
+ "min": 0.0,
151
+ "max": 0.0
152
+ },
153
+ {
154
+ "current": 2800.0,
155
+ "min": 0.0,
156
+ "max": 0.0
157
+ },
158
+ {
159
+ "current": 2800.0,
160
+ "min": 0.0,
161
+ "max": 0.0
162
+ },
163
+ {
164
+ "current": 2800.0,
165
+ "min": 0.0,
166
+ "max": 0.0
167
+ },
168
+ {
169
+ "current": 2800.0,
170
+ "min": 0.0,
171
+ "max": 0.0
172
+ },
173
+ {
174
+ "current": 2800.0,
175
+ "min": 0.0,
176
+ "max": 0.0
177
+ },
178
+ {
179
+ "current": 2800.0,
180
+ "min": 0.0,
181
+ "max": 0.0
182
+ },
183
+ {
184
+ "current": 2800.0,
185
+ "min": 0.0,
186
+ "max": 0.0
187
+ },
188
+ {
189
+ "current": 2800.0,
190
+ "min": 0.0,
191
+ "max": 0.0
192
+ },
193
+ {
194
+ "current": 2800.0,
195
+ "min": 0.0,
196
+ "max": 0.0
197
+ },
198
+ {
199
+ "current": 2800.0,
200
+ "min": 0.0,
201
+ "max": 0.0
202
+ },
203
+ {
204
+ "current": 2800.0,
205
+ "min": 0.0,
206
+ "max": 0.0
207
+ },
208
+ {
209
+ "current": 2800.0,
210
+ "min": 0.0,
211
+ "max": 0.0
212
+ },
213
+ {
214
+ "current": 2800.0,
215
+ "min": 0.0,
216
+ "max": 0.0
217
+ },
218
+ {
219
+ "current": 2800.0,
220
+ "min": 0.0,
221
+ "max": 0.0
222
+ },
223
+ {
224
+ "current": 2800.0,
225
+ "min": 0.0,
226
+ "max": 0.0
227
+ },
228
+ {
229
+ "current": 2800.0,
230
+ "min": 0.0,
231
+ "max": 0.0
232
+ },
233
+ {
234
+ "current": 2800.0,
235
+ "min": 0.0,
236
+ "max": 0.0
237
+ },
238
+ {
239
+ "current": 2800.0,
240
+ "min": 0.0,
241
+ "max": 0.0
242
+ },
243
+ {
244
+ "current": 2800.0,
245
+ "min": 0.0,
246
+ "max": 0.0
247
+ },
248
+ {
249
+ "current": 2800.0,
250
+ "min": 0.0,
251
+ "max": 0.0
252
+ },
253
+ {
254
+ "current": 2800.0,
255
+ "min": 0.0,
256
+ "max": 0.0
257
+ },
258
+ {
259
+ "current": 2800.0,
260
+ "min": 0.0,
261
+ "max": 0.0
262
+ }
263
+ ],
264
+ "disk": {
265
+ "/": {
266
+ "total": 47.760292053222656,
267
+ "used": 15.848091125488281
268
+ }
269
+ },
270
+ "gpu": "NVIDIA A100 80GB PCIe",
271
+ "gpu_count": 1,
272
+ "gpu_devices": [
273
+ {
274
+ "name": "NVIDIA A100 80GB PCIe",
275
+ "memory_total": 85899345920
276
+ }
277
+ ],
278
+ "memory": {
279
+ "total": 502.83758544921875
280
+ }
281
+ }
wandb/run-20240917_155335-i6pu2qqp/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 0.019, "train/grad_norm": 0.881105363368988, "train/learning_rate": 9.983361064891848e-09, "train/epoch": 9.992872416250892, "train/global_step": 7010, "_timestamp": 1726590177.5914943, "_runtime": 5362.377482414246, "_step": 20, "eval/loss": 0.18408216536045074, "eval/f1": 0.8384397163120567, "eval/precision": 0.8348280488666054, "eval/recall": 0.842082769428022, "eval/accuracy": 0.9649106718849788, "eval/runtime": 74.7606, "eval/samples_per_second": 150.106, "eval/steps_per_second": 18.767, "train_runtime": 5353.7264, "train_samples_per_second": 83.841, "train_steps_per_second": 1.309, "total_flos": 8.637987044603056e+16, "train_loss": 0.08022265604321184}
wandb/run-20240917_155335-i6pu2qqp/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240917_155335-i6pu2qqp/logs/debug.log ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Current SDK version is 0.17.9
2
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Configure stats pid to 1289831
3
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Loading settings from /homes/amrufai/.config/wandb/settings
4
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Loading settings from /nfs/production/literature/amina-mardiyyah/wandb/settings
5
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Loading settings from environment variables: {}
6
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Applying login settings: {}
8
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_setup.py:_flush():77] Applying login settings: {}
9
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_init.py:_log_setup():524] Logging user logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4/wandb/run-20240917_155335-i6pu2qqp/logs/debug.log
10
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_init.py:_log_setup():525] Logging internal logs to /nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4/wandb/run-20240917_155335-i6pu2qqp/logs/debug-internal.log
11
+ 2024-09-17 15:53:35,207 INFO MainThread:1289831 [wandb_init.py:_jupyter_setup():470] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f917fe558d0>
12
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():608] calling init triggers
13
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():615] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():658] starting backend
16
+ 2024-09-17 15:53:35,208 INFO MainThread:1289831 [wandb_init.py:init():662] setting up manager
17
+ 2024-09-17 15:53:35,210 INFO MainThread:1289831 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-09-17 15:53:35,213 INFO MainThread:1289831 [wandb_init.py:init():670] backend started and connected
19
+ 2024-09-17 15:53:35,221 INFO MainThread:1289831 [wandb_run.py:_label_probe_notebook():1344] probe notebook
20
+ 2024-09-17 15:53:35,243 INFO MainThread:1289831 [wandb_run.py:_label_probe_notebook():1354] Unable to probe notebook: 'NoneType' object has no attribute 'get'
21
+ 2024-09-17 15:53:35,243 INFO MainThread:1289831 [wandb_init.py:init():768] updated telemetry
22
+ 2024-09-17 15:53:35,257 INFO MainThread:1289831 [wandb_init.py:init():801] communicating run to backend with 90.0 second timeout
23
+ 2024-09-17 15:53:35,636 INFO MainThread:1289831 [wandb_init.py:init():852] starting run threads in backend
24
+ 2024-09-17 15:53:36,384 INFO MainThread:1289831 [wandb_run.py:_console_start():2465] atexit reg
25
+ 2024-09-17 15:53:36,385 INFO MainThread:1289831 [wandb_run.py:_redirect():2311] redirect: wrap_raw
26
+ 2024-09-17 15:53:36,385 INFO MainThread:1289831 [wandb_run.py:_redirect():2376] Wrapping output streams.
27
+ 2024-09-17 15:53:36,385 INFO MainThread:1289831 [wandb_run.py:_redirect():2401] Redirects installed.
28
+ 2024-09-17 15:53:36,390 INFO MainThread:1289831 [wandb_init.py:init():895] run started, returning control to user process
29
+ 2024-09-17 15:53:36,399 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
30
+ 2024-09-17 15:53:36,399 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
31
+ 2024-09-17 15:53:36,409 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
32
+ 2024-09-17 15:53:36,452 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
33
+ 2024-09-17 15:53:36,453 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
34
+ 2024-09-17 15:53:36,459 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
35
+ 2024-09-17 15:53:36,464 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
36
+ 2024-09-17 15:53:36,464 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
37
+ 2024-09-17 15:53:36,470 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
38
+ 2024-09-17 15:53:43,871 INFO MainThread:1289831 [wandb_run.py:_config_callback():1392] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['XLMRobertaForTokenClassification'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-PER', 2: 'I-PER', 3: 'B-ORG', 4: 'I-ORG', 5: 'B-LOC', 6: 'I-LOC'}, 'label2id': {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'masakhane/afroxlmr-large-ner-masakhaner-1.0_2.0', 'transformers_version': '4.43.4', 'adapters': {'adapters': {}, 'config_map': {}, 'fusion_config_map': {}, 'fusions': {}}, 'gradient_checkpointing': False, 'model_type': 'xlm-roberta', 'output_past': True, 'vocab_size': 250002, 'hidden_size': 1024, 'num_hidden_layers': 24, 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 514, 'type_vocab_size': 1, 'initializer_range': 0.02, 'layer_norm_eps': 1e-05, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 10, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4/logs', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 2, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/nfs/production/literature/amina-mardiyyah/NER/ewc_stabilised_no_date_lambda0.4', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'eval_use_gather_object': False}
39
+ 2024-09-17 15:53:43,873 INFO MainThread:1289831 [wandb_config.py:__setitem__():154] config set model/num_parameters = 558848007 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f9142db3c90>>
40
+ 2024-09-17 15:53:43,873 INFO MainThread:1289831 [wandb_run.py:_config_callback():1392] config_cb model/num_parameters 558848007 None
41
+ 2024-09-17 17:22:57,597 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
42
+ 2024-09-17 17:22:57,598 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
43
+ 2024-09-17 17:59:40,303 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
44
+ 2024-09-17 17:59:40,309 INFO MainThread:1289831 [jupyter.py:save_ipynb():372] not saving jupyter notebook
45
+ 2024-09-17 17:59:40,309 INFO MainThread:1289831 [wandb_init.py:_pause_backend():435] pausing backend
46
+ 2024-09-17 19:32:52,614 INFO MainThread:1289831 [wandb_init.py:_resume_backend():440] resuming backend
wandb/run-20240917_155335-i6pu2qqp/run-i6pu2qqp.wandb ADDED
Binary file (395 kB). View file