jvelja commited on
Commit
c64f5a9
·
verified ·
1 Parent(s): 4fe28c4

Push model using huggingface_hub.

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. adapter_model.safetensors +1 -1
  3. config.json +10 -10
  4. pytorch_model.bin +1 -1
README.md CHANGED
@@ -26,7 +26,7 @@ You can then generate text as follows:
26
  ```python
27
  from transformers import pipeline
28
 
29
- generator = pipeline("text-generation", model="jvelja//tmp/tmp1th6hrkz/jvelja/gemma2b-sanity-multivllm_0")
30
  outputs = generator("Hello, my llama is cute")
31
  ```
32
 
@@ -36,8 +36,8 @@ If you want to use the model for training or to obtain the outputs from the valu
36
  from transformers import AutoTokenizer
37
  from trl import AutoModelForCausalLMWithValueHead
38
 
39
- tokenizer = AutoTokenizer.from_pretrained("jvelja//tmp/tmp1th6hrkz/jvelja/gemma2b-sanity-multivllm_0")
40
- model = AutoModelForCausalLMWithValueHead.from_pretrained("jvelja//tmp/tmp1th6hrkz/jvelja/gemma2b-sanity-multivllm_0")
41
 
42
  inputs = tokenizer("Hello, my llama is cute", return_tensors="pt")
43
  outputs = model(**inputs, labels=inputs["input_ids"])
 
26
  ```python
27
  from transformers import pipeline
28
 
29
+ generator = pipeline("text-generation", model="jvelja//tmp/tmptn97qlzw/jvelja/gemma2b-sanity-multivllm_0")
30
  outputs = generator("Hello, my llama is cute")
31
  ```
32
 
 
36
  from transformers import AutoTokenizer
37
  from trl import AutoModelForCausalLMWithValueHead
38
 
39
+ tokenizer = AutoTokenizer.from_pretrained("jvelja//tmp/tmptn97qlzw/jvelja/gemma2b-sanity-multivllm_0")
40
+ model = AutoModelForCausalLMWithValueHead.from_pretrained("jvelja//tmp/tmptn97qlzw/jvelja/gemma2b-sanity-multivllm_0")
41
 
42
  inputs = tokenizer("Hello, my llama is cute", return_tensors="pt")
43
  outputs = model(**inputs, labels=inputs["input_ids"])
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afaca8044f042ff38b35e1ddf38af6001eea7bd54e3154c207b7f2bf2ca76204
3
  size 12793376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a1c2bc4f56acaed4c390887952f5317c32c53d412f47a88b73cdac053c7320
3
  size 12793376
config.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "accelerator_kwargs": {},
3
  "adap_kl_ctrl": true,
4
- "backward_batch_size": 64,
5
- "batch_size": 64,
6
  "cliprange": 0.2,
7
  "cliprange_value": 0.2,
8
  "compare_steps": 1,
@@ -11,12 +11,12 @@
11
  "exp_name": "stego_trainer",
12
  "forward_batch_size": null,
13
  "gamma": 1,
14
- "global_backward_batch_size": 64,
15
- "global_batch_size": 64,
16
  "gradient_accumulation_steps": 4,
17
  "gradient_checkpointing": false,
18
  "horizon": 10000,
19
- "init_kl_coef": 0.2,
20
  "is_encoder_decoder": false,
21
  "is_peft_model": true,
22
  "kl_penalty": "kl",
@@ -24,7 +24,7 @@
24
  "learning_rate": 2e-05,
25
  "log_with": "wandb",
26
  "max_grad_norm": null,
27
- "mini_batch_size": 16,
28
  "model_name": "unsloth/gemma-2-2b-it",
29
  "optimize_cuda_cache": true,
30
  "optimize_device_cache": false,
@@ -38,13 +38,13 @@
38
  "score_clip": null,
39
  "seed": 0,
40
  "steps": 20000,
41
- "target": 6,
42
  "target_kl": 1,
43
  "task_name": null,
44
  "tracker_kwargs": {
45
  "wandb": {
46
- "name": "cv_gemma-2-2b-it_to_distilbert-base-uncased_EBS64_Joan",
47
- "notes": "Dataset: cv\n Same Prompt: \n Payload Prefixes: ['Movie Review: This movie was really amazing!', 'Movie Review: This movie was really terrible!']\n Payload Template: Movie Review: This movie was really {payload}!\n Separate Enc/Dec Data: True\n\n Encoder: gemma-2-2b-it (LR: 2e-05)\n Decoder: distilbert-base-uncased (LR: 2e-05)\n Train Loop: v2_dylan\n\n Effective Batch Sizes:\n - Encoder: 64\n - Decoder: 256\n\n Training Iterations:\n - Encoder updates: 80\n - Decoder updates: 240\n - Update Encoder First: False\n\n Temperatures:\n - Decoder Training: 1.0\n - Encoder Training: 1.0\n - Evaluation: 1.0\n\n Encoder Parameters:\n - KL Coefficient: 0.1\n - LoRA: True\n - Quantization: False\n - Output Length: {'min': 50, 'max': 56}\n\n Decoder Parameters:\n - New Classification Head: True\n - Use Probs Reward: False\n - Weight Decay: 0.01\n - Update Parameters: {'head': True, 'body': True}\n\n Training Configuration:\n - Update Encoder: True\n - Update Decoder: True\n - Paraphrase: False\n - Leak Password: False\n - WandB Logging: True\n - Eval Every N: 50\n - Number of Epochs: 100000\n\n Debug:\n - Override Dec Batch: False",
48
  "tags": [
49
  "cv",
50
  "gemma-2-2b-it",
@@ -52,7 +52,7 @@
52
  "v2_dylan",
53
  "enc_lr_2e-05",
54
  "dec_lr_2e-05",
55
- "enc_eff_bs_64",
56
  "dec_eff_bs_256",
57
  "enc_updates_80",
58
  "dec_updates_240",
 
1
  {
2
  "accelerator_kwargs": {},
3
  "adap_kl_ctrl": true,
4
+ "backward_batch_size": 128,
5
+ "batch_size": 128,
6
  "cliprange": 0.2,
7
  "cliprange_value": 0.2,
8
  "compare_steps": 1,
 
11
  "exp_name": "stego_trainer",
12
  "forward_batch_size": null,
13
  "gamma": 1,
14
+ "global_backward_batch_size": 128,
15
+ "global_batch_size": 128,
16
  "gradient_accumulation_steps": 4,
17
  "gradient_checkpointing": false,
18
  "horizon": 10000,
19
+ "init_kl_coef": 0.05,
20
  "is_encoder_decoder": false,
21
  "is_peft_model": true,
22
  "kl_penalty": "kl",
 
24
  "learning_rate": 2e-05,
25
  "log_with": "wandb",
26
  "max_grad_norm": null,
27
+ "mini_batch_size": 32,
28
  "model_name": "unsloth/gemma-2-2b-it",
29
  "optimize_cuda_cache": true,
30
  "optimize_device_cache": false,
 
38
  "score_clip": null,
39
  "seed": 0,
40
  "steps": 20000,
41
+ "target": 12.0,
42
  "target_kl": 1,
43
  "task_name": null,
44
  "tracker_kwargs": {
45
  "wandb": {
46
+ "name": "cv_gemma-2-2b-it_to_distilbert-base-uncased_EBS128_Joan",
47
+ "notes": "Dataset: cv\n Same Prompt: \n Payload Prefixes: ['Movie Review: This movie was really amazing!', 'Movie Review: This movie was really terrible!']\n Payload Template: Movie Review: This movie was really {payload}!\n Separate Enc/Dec Data: True\n\n Encoder: gemma-2-2b-it (LR: 2e-05)\n Decoder: distilbert-base-uncased (LR: 2e-05)\n Train Loop: v2_dylan\n\n Effective Batch Sizes:\n - Encoder: 128\n - Decoder: 256\n\n Training Iterations:\n - Encoder updates: 80\n - Decoder updates: 240\n - Update Encoder First: False\n\n Temperatures:\n - Decoder Training: 1.0\n - Encoder Training: 1.0\n - Evaluation: 1.0\n\n Encoder Parameters:\n - KL Coefficient: 0.05\n - LoRA: True\n - Quantization: False\n - Output Length: {'min': 50, 'max': 56}\n\n Decoder Parameters:\n - New Classification Head: True\n - Use Probs Reward: False\n - Weight Decay: 0.01\n - Update Parameters: {'head': True, 'body': True}\n\n Training Configuration:\n - Update Encoder: True\n - Update Decoder: True\n - Paraphrase: False\n - Leak Password: False\n - WandB Logging: True\n - Eval Every N: 50\n - Number of Epochs: 100000\n\n Debug:\n - Override Dec Batch: False",
48
  "tags": [
49
  "cv",
50
  "gemma-2-2b-it",
 
52
  "v2_dylan",
53
  "enc_lr_2e-05",
54
  "dec_lr_2e-05",
55
+ "enc_eff_bs_128",
56
  "dec_eff_bs_256",
57
  "enc_updates_80",
58
  "dec_updates_240",
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78e2211ed615bb005af73ea05a94170310ad35e2f7a7f98ef6f832fbf3d4677a
3
  size 10748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af3a50393088d7b124587b883915ea2a75a4e8e0d8f735541c6c333cfa80b498
3
  size 10748