|  | base_model: EleutherAI/pythia-12b-deduped | 
					
						
						|  | base_model_config: EleutherAI/pythia-12b-deduped | 
					
						
						|  | base_model_ignore_patterns: pytorch* | 
					
						
						|  | model_type: GPTNeoXForCausalLM | 
					
						
						|  | tokenizer_type: AutoTokenizer | 
					
						
						|  | load_in_8bit: false | 
					
						
						|  | load_in_4bit: false | 
					
						
						|  | gptq: false | 
					
						
						|  | device_map: auto | 
					
						
						|  | datasets: | 
					
						
						|  | - path: vicgalle/alpaca-gpt4 | 
					
						
						|  | type: alpaca | 
					
						
						|  | dataset_prepared_path: last_run_prepared | 
					
						
						|  | val_set_size: 0.05 | 
					
						
						|  | adapter: | 
					
						
						|  | lora_model_dir: | 
					
						
						|  | sequence_len: 2048 | 
					
						
						|  | max_packed_sequence_len: 2048 | 
					
						
						|  | lora_r: 64 | 
					
						
						|  | lora_alpha: 32 | 
					
						
						|  | lora_dropout: 0.0 | 
					
						
						|  | lora_target_modules: | 
					
						
						|  | lora_target_linear: true | 
					
						
						|  | lora_fan_in_fan_out: true | 
					
						
						|  | wandb_project: | 
					
						
						|  | wandb_watch: | 
					
						
						|  | wandb_run_id: | 
					
						
						|  | wandb_log_model: | 
					
						
						|  | output_dir: ./pythia-12b | 
					
						
						|  | gradient_accumulation_steps: 1 | 
					
						
						|  | micro_batch_size: 1 | 
					
						
						|  | num_epochs: 5 | 
					
						
						|  | learning_rate: 0.00003 | 
					
						
						|  | optimizer: adamw_bnb_8bit | 
					
						
						|  | lr_scheduler: cosine | 
					
						
						|  | train_on_inputs: false | 
					
						
						|  | group_by_length: false | 
					
						
						|  | bf16: false | 
					
						
						|  | fp16: false | 
					
						
						|  | float16: true | 
					
						
						|  | tf32: true | 
					
						
						|  | flash_optimum: true | 
					
						
						|  | early_stopping_patience: | 
					
						
						|  | resume_from_checkpoint: | 
					
						
						|  | local_rank: | 
					
						
						|  | gradient_checkpointing: true | 
					
						
						|  | fsdp: | 
					
						
						|  | fsdp_config: | 
					
						
						|  | collator_pad_to_longest: true | 
					
						
						|  |  |