Upload folder using huggingface_hub
Browse files- config.json +178 -0
- model.safetensors +3 -0
- train_config.json +112 -0
config.json
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "smolvla",
|
3 |
+
"n_obs_steps": 1,
|
4 |
+
"normalization_mapping": {
|
5 |
+
"VISUAL": "IDENTITY",
|
6 |
+
"STATE": "MEAN_STD",
|
7 |
+
"ACTION": "MEAN_STD"
|
8 |
+
},
|
9 |
+
"input_features": {
|
10 |
+
"observation.state": {
|
11 |
+
"type": "STATE",
|
12 |
+
"shape": [
|
13 |
+
6
|
14 |
+
]
|
15 |
+
},
|
16 |
+
"observation.images.wrist": {
|
17 |
+
"type": "VISUAL",
|
18 |
+
"shape": [
|
19 |
+
3,
|
20 |
+
480,
|
21 |
+
640
|
22 |
+
]
|
23 |
+
},
|
24 |
+
"observation.images.top": {
|
25 |
+
"type": "VISUAL",
|
26 |
+
"shape": [
|
27 |
+
3,
|
28 |
+
480,
|
29 |
+
640
|
30 |
+
]
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"output_features": {
|
34 |
+
"action": {
|
35 |
+
"type": "ACTION",
|
36 |
+
"shape": [
|
37 |
+
6
|
38 |
+
]
|
39 |
+
}
|
40 |
+
},
|
41 |
+
"chunk_size": 50,
|
42 |
+
"n_action_steps": 50,
|
43 |
+
"max_state_dim": 6,
|
44 |
+
"max_action_dim": 6,
|
45 |
+
"resize_imgs_with_padding": [
|
46 |
+
640,
|
47 |
+
480
|
48 |
+
],
|
49 |
+
"empty_cameras": 0,
|
50 |
+
"adapt_to_pi_aloha": false,
|
51 |
+
"use_delta_joint_actions_aloha": false,
|
52 |
+
"tokenizer_max_length": 48,
|
53 |
+
"num_steps": 10,
|
54 |
+
"use_cache": true,
|
55 |
+
"freeze_vision_encoder": true,
|
56 |
+
"train_expert_only": true,
|
57 |
+
"train_state_proj": true,
|
58 |
+
"optimizer_lr": 0.0001,
|
59 |
+
"optimizer_betas": [
|
60 |
+
0.9,
|
61 |
+
0.95
|
62 |
+
],
|
63 |
+
"optimizer_eps": 1e-08,
|
64 |
+
"optimizer_weight_decay": 1e-10,
|
65 |
+
"optimizer_grad_clip_norm": 10,
|
66 |
+
"scheduler_warmup_steps": 1000,
|
67 |
+
"scheduler_decay_steps": 30000,
|
68 |
+
"scheduler_decay_lr": 2.5e-06,
|
69 |
+
"vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
|
70 |
+
"load_vlm_weights": true,
|
71 |
+
"attention_mode": "cross_attn",
|
72 |
+
"prefix_length": 0,
|
73 |
+
"pad_language_to": "max_length",
|
74 |
+
"num_expert_layers": 0,
|
75 |
+
"num_vlm_layers": 16,
|
76 |
+
"self_attn_every_n_layers": 2,
|
77 |
+
"expert_width_multiplier": 0.75,
|
78 |
+
"robot_config": {
|
79 |
+
"robot_type": "so101_follower",
|
80 |
+
"codebase_version": "v2.1",
|
81 |
+
"total_episodes": 86,
|
82 |
+
"total_frames": 23660,
|
83 |
+
"total_tasks": 1,
|
84 |
+
"total_videos": 172,
|
85 |
+
"total_chunks": 1,
|
86 |
+
"chunks_size": 1000,
|
87 |
+
"fps": 30,
|
88 |
+
"splits": {
|
89 |
+
"train": "0:86"
|
90 |
+
},
|
91 |
+
"data_path": "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet",
|
92 |
+
"video_path": "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4",
|
93 |
+
"action_names": [
|
94 |
+
"shoulder_pan.pos",
|
95 |
+
"shoulder_lift.pos",
|
96 |
+
"elbow_flex.pos",
|
97 |
+
"wrist_flex.pos",
|
98 |
+
"wrist_roll.pos",
|
99 |
+
"gripper.pos"
|
100 |
+
],
|
101 |
+
"camera_views": ["wrist", "top"],
|
102 |
+
"features": {
|
103 |
+
"action": {
|
104 |
+
"dtype": "float32",
|
105 |
+
"shape": [6],
|
106 |
+
"names": [
|
107 |
+
"shoulder_pan.pos",
|
108 |
+
"shoulder_lift.pos",
|
109 |
+
"elbow_flex.pos",
|
110 |
+
"wrist_flex.pos",
|
111 |
+
"wrist_roll.pos",
|
112 |
+
"gripper.pos"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"observation.state": {
|
116 |
+
"dtype": "float32",
|
117 |
+
"shape": [6],
|
118 |
+
"names": [
|
119 |
+
"shoulder_pan.pos",
|
120 |
+
"shoulder_lift.pos",
|
121 |
+
"elbow_flex.pos",
|
122 |
+
"wrist_flex.pos",
|
123 |
+
"wrist_roll.pos",
|
124 |
+
"gripper.pos"
|
125 |
+
]
|
126 |
+
},
|
127 |
+
"timestamp": {
|
128 |
+
"dtype": "float32",
|
129 |
+
"shape": [1],
|
130 |
+
"names": null
|
131 |
+
},
|
132 |
+
"episode_index": {
|
133 |
+
"dtype": "int64",
|
134 |
+
"shape": [1],
|
135 |
+
"names": null
|
136 |
+
},
|
137 |
+
"frame_index": {
|
138 |
+
"dtype": "int64",
|
139 |
+
"shape": [1],
|
140 |
+
"names": null
|
141 |
+
},
|
142 |
+
"task_index": {
|
143 |
+
"dtype": "int64",
|
144 |
+
"shape": [1],
|
145 |
+
"names": null
|
146 |
+
},
|
147 |
+
"index": {
|
148 |
+
"dtype": "int64",
|
149 |
+
"shape": [1],
|
150 |
+
"names": null
|
151 |
+
},
|
152 |
+
"observation.images.wrist": {
|
153 |
+
"dtype": "video",
|
154 |
+
"shape": [480, 640, 3],
|
155 |
+
"names": ["height", "width", "channels"],
|
156 |
+
"info": {
|
157 |
+
"video.fps": 30,
|
158 |
+
"video.codec": "av1",
|
159 |
+
"video.pix_fmt": "yuv420p",
|
160 |
+
"video.is_depth_map": false,
|
161 |
+
"has_audio": false
|
162 |
+
}
|
163 |
+
},
|
164 |
+
"observation.images.top": {
|
165 |
+
"dtype": "video",
|
166 |
+
"shape": [480, 640, 3],
|
167 |
+
"names": ["height", "width", "channels"],
|
168 |
+
"info": {
|
169 |
+
"video.fps": 30,
|
170 |
+
"video.codec": "av1",
|
171 |
+
"video.pix_fmt": "yuv420p",
|
172 |
+
"video.is_depth_map": false,
|
173 |
+
"has_audio": false
|
174 |
+
}
|
175 |
+
}
|
176 |
+
}
|
177 |
+
}
|
178 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9f15de7415427b7754ad0cc1336e57089a819af17c9755ee408e66f93628d4d
|
3 |
+
size 906713296
|
train_config.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"batch_size": 64,
|
2 |
+
"dataset": {"episodes": null,
|
3 |
+
"image_transforms": {"enable": false,
|
4 |
+
"max_num_transforms": 3,
|
5 |
+
"random_order": false,
|
6 |
+
"tfs": {"brightness": {"kwargs": {"brightness": [0.8,
|
7 |
+
1.2]},
|
8 |
+
"type": "ColorJitter",
|
9 |
+
"weight": 1.0},
|
10 |
+
"contrast": {"kwargs": {"contrast": [0.8,
|
11 |
+
1.2]},
|
12 |
+
"type": "ColorJitter",
|
13 |
+
"weight": 1.0},
|
14 |
+
"hue": {"kwargs": {"hue": [-0.05,
|
15 |
+
0.05]},
|
16 |
+
"type": "ColorJitter",
|
17 |
+
"weight": 1.0},
|
18 |
+
"saturation": {"kwargs": {"saturation": [0.5,
|
19 |
+
1.5]},
|
20 |
+
"type": "ColorJitter",
|
21 |
+
"weight": 1.0},
|
22 |
+
"sharpness": {"kwargs": {"sharpness": [0.5,
|
23 |
+
1.5]},
|
24 |
+
"type": "SharpnessJitter",
|
25 |
+
"weight": 1.0}}},
|
26 |
+
"repo_id": "florian-moyen/strawberries_merged",
|
27 |
+
"revision": null,
|
28 |
+
"root": null,
|
29 |
+
"use_imagenet_stats": true,
|
30 |
+
"video_backend": "torchcodec"},
|
31 |
+
"env": null,
|
32 |
+
"eval": {"batch_size": 50, "n_episodes": 50, "use_async_envs": false},
|
33 |
+
"eval_freq": 20000,
|
34 |
+
"job_name": "strawberries_job",
|
35 |
+
"log_freq": 200,
|
36 |
+
"num_workers": 4,
|
37 |
+
"optimizer": {"betas": [0.9, 0.95],
|
38 |
+
"eps": 1e-08,
|
39 |
+
"grad_clip_norm": 10.0,
|
40 |
+
"lr": 0.0001,
|
41 |
+
"type": "adamw",
|
42 |
+
"weight_decay": 1e-10},
|
43 |
+
"output_dir": "outputs/train/strawberries_model",
|
44 |
+
"policy": {"adapt_to_pi_aloha": false,
|
45 |
+
"add_image_special_tokens": false,
|
46 |
+
"attention_mode": "cross_attn",
|
47 |
+
"chunk_size": 50,
|
48 |
+
"device": "cuda",
|
49 |
+
"empty_cameras": 0,
|
50 |
+
"expert_width_multiplier": 0.75,
|
51 |
+
"freeze_vision_encoder": true,
|
52 |
+
"input_features": {"observation.image": {"shape": [3, 256, 256],
|
53 |
+
"type": "VISUAL"},
|
54 |
+
"observation.image2": {"shape": [3, 256, 256],
|
55 |
+
"type": "VISUAL"},
|
56 |
+
"observation.image3": {"shape": [3, 256, 256],
|
57 |
+
"type": "VISUAL"},
|
58 |
+
"observation.state": {"shape": [6],
|
59 |
+
"type": "STATE"}},
|
60 |
+
"load_vlm_weights": true,
|
61 |
+
"max_action_dim": 32,
|
62 |
+
"max_period": 4.0,
|
63 |
+
"max_state_dim": 32,
|
64 |
+
"min_period": 0.004,
|
65 |
+
"n_action_steps": 50,
|
66 |
+
"n_obs_steps": 1,
|
67 |
+
"normalization_mapping": {"ACTION": "MEAN_STD",
|
68 |
+
"STATE": "MEAN_STD",
|
69 |
+
"VISUAL": "IDENTITY"},
|
70 |
+
"num_expert_layers": 0,
|
71 |
+
"num_steps": 10,
|
72 |
+
"num_vlm_layers": 16,
|
73 |
+
"optimizer_betas": [0.9, 0.95],
|
74 |
+
"optimizer_eps": 1e-08,
|
75 |
+
"optimizer_grad_clip_norm": 10.0,
|
76 |
+
"optimizer_lr": 0.0001,
|
77 |
+
"optimizer_weight_decay": 1e-10,
|
78 |
+
"output_features": {"action": {"shape": [6],
|
79 |
+
"type": "ACTION"}},
|
80 |
+
"pad_language_to": "max_length",
|
81 |
+
"prefix_length": 0,
|
82 |
+
"resize_imgs_with_padding": [512, 512],
|
83 |
+
"scheduler_decay_lr": 2.5e-06,
|
84 |
+
"scheduler_decay_steps": 30000,
|
85 |
+
"scheduler_warmup_steps": 1000,
|
86 |
+
"self_attn_every_n_layers": 2,
|
87 |
+
"tokenizer_max_length": 48,
|
88 |
+
"train_expert_only": true,
|
89 |
+
"train_state_proj": true,
|
90 |
+
"type": "smolvla",
|
91 |
+
"use_amp": false,
|
92 |
+
"use_cache": true,
|
93 |
+
"use_delta_joint_actions_aloha": false,
|
94 |
+
"vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"},
|
95 |
+
"resume": false,
|
96 |
+
"save_checkpoint": true,
|
97 |
+
"save_freq": 20000,
|
98 |
+
"scheduler": {"decay_lr": 2.5e-06,
|
99 |
+
"num_decay_steps": 30000,
|
100 |
+
"num_warmup_steps": 1000,
|
101 |
+
"peak_lr": 0.0001,
|
102 |
+
"type": "cosine_decay_with_warmup"},
|
103 |
+
"seed": 1000,
|
104 |
+
"steps": 20000,
|
105 |
+
"use_policy_training_preset": true,
|
106 |
+
"wandb": {"disable_artifact": false,
|
107 |
+
"enable": true,
|
108 |
+
"entity": null,
|
109 |
+
"mode": null,
|
110 |
+
"notes": null,
|
111 |
+
"project": "lerobot",
|
112 |
+
"run_id": null}}
|