George-API commited on
Commit
a08fcdc
·
verified ·
1 Parent(s): 8ef55a9

Upload run_cloud_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_cloud_training.py +118 -80
run_cloud_training.py CHANGED
@@ -24,6 +24,21 @@ from unsloth import FastLanguageModel
24
  # Disable flash attention globally
25
  os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Check if tensorboard is available
28
  try:
29
  import tensorboard
@@ -76,20 +91,25 @@ def load_and_prepare_dataset(dataset_name, config):
76
  # Get the dataset config
77
  dataset_config = config.get("dataset_config", {})
78
  sort_field = dataset_config.get("sort_by_field", "prompt_number")
79
- sort_direction = dataset_config.get("sort_direction", "ascending")
80
 
81
- # Sort the dataset by prompt_number
82
- logger.info(f"Sorting dataset by {sort_field} in {sort_direction} order")
83
- if sort_direction == "ascending":
84
- dataset = dataset.sort(sort_field)
85
- else:
86
- dataset = dataset.sort(sort_field, reverse=True)
87
 
88
- # Add shuffle with fixed seed if specified
89
- if "shuffle_seed" in dataset_config:
90
- shuffle_seed = dataset_config.get("shuffle_seed")
91
- logger.info(f"Shuffling dataset with seed {shuffle_seed}")
92
- dataset = dataset.shuffle(seed=shuffle_seed)
 
 
 
 
 
 
 
 
 
93
 
94
  # Print dataset structure for debugging
95
  logger.info(f"Dataset loaded with {len(dataset)} entries")
@@ -263,62 +283,77 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
263
  """
264
  try:
265
  logger.info(f"Attempting to load model with unsloth optimizations: {model_name}")
266
- # First try the standard unsloth loading
 
 
 
 
 
 
 
 
 
 
267
  try:
268
- # Try loading with unsloth but without the problematic parameter
269
- logger.info("Loading model with flash attention DISABLED")
270
  model, tokenizer = FastLanguageModel.from_pretrained(
271
  model_name=model_name,
272
  max_seq_length=max_seq_length,
273
  dtype=dtype,
274
- load_in_4bit=True, # This should work for already quantized models
275
- use_flash_attention=False, # Explicitly disable flash attention
276
- attn_implementation="eager" # Use eager implementation instead
277
  )
278
- logger.info("Model loaded successfully with unsloth with 4-bit quantization and flash attention disabled")
279
  return model, tokenizer
280
 
281
- except TypeError as e:
282
- # If we get a TypeError about unexpected keyword arguments
283
- if "unexpected keyword argument" in str(e):
284
- logger.warning(f"Unsloth loading error with 4-bit: {e}")
285
- logger.info("Trying alternative loading method for Qwen model...")
 
 
 
 
 
 
286
 
287
- # Try loading with different parameters for Qwen model
288
- model, tokenizer = FastLanguageModel.from_pretrained(
289
- model_name=model_name,
290
- max_seq_length=max_seq_length,
291
- dtype=dtype,
292
- use_flash_attention=False, # Explicitly disable flash attention
 
 
 
293
  )
294
- logger.info("Model loaded successfully with unsloth using alternative method")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  return model, tokenizer
296
- else:
297
- # Re-raise if it's a different type error
298
- raise
299
 
300
  except Exception as e:
301
- # Fallback to standard loading if unsloth methods fail
302
- logger.warning(f"Unsloth loading failed: {e}")
303
- logger.info("Falling back to standard Hugging Face loading...")
304
-
305
- # Disable flash attention in transformers config
306
- config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
307
- if hasattr(config, "use_flash_attention"):
308
- config.use_flash_attention = False
309
- logger.info("Disabled flash attention in model config")
310
-
311
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
312
- model = AutoModelForCausalLM.from_pretrained(
313
- model_name,
314
- config=config,
315
- device_map="auto",
316
- torch_dtype=dtype or torch.float16,
317
- load_in_4bit=True,
318
- attn_implementation="eager" # Use eager implementation instead of flash attention
319
- )
320
- logger.info("Model loaded successfully with standard HF loading and flash attention disabled")
321
- return model, tokenizer
322
 
323
  def train(config_path, dataset_name, output_dir):
324
  """Main training function - RESEARCH TRAINING PHASE ONLY"""
@@ -423,31 +458,34 @@ def train(config_path, dataset_name, output_dir):
423
  reports = ["none"]
424
  logger.warning("No reporting backends available - training metrics won't be logged")
425
 
426
- # Set up training arguments with flash attention disabled
427
- training_args = TrainingArguments(
428
- output_dir=output_dir,
429
- num_train_epochs=training_config.get("num_train_epochs", 3),
430
- per_device_train_batch_size=training_config.get("per_device_train_batch_size", 2),
431
- gradient_accumulation_steps=training_config.get("gradient_accumulation_steps", 4),
432
- learning_rate=training_config.get("learning_rate", 2e-5),
433
- lr_scheduler_type=training_config.get("lr_scheduler_type", "cosine"),
434
- warmup_ratio=training_config.get("warmup_ratio", 0.03),
435
- weight_decay=training_config.get("weight_decay", 0.01),
436
- optim=training_config.get("optim", "adamw_torch"),
437
- logging_steps=training_config.get("logging_steps", 10),
438
- save_steps=training_config.get("save_steps", 200),
439
- save_total_limit=training_config.get("save_total_limit", 3),
440
- fp16=hardware_config.get("fp16", True),
441
- bf16=hardware_config.get("bf16", False),
442
- max_grad_norm=training_config.get("max_grad_norm", 0.3),
443
- report_to=reports,
444
- logging_first_step=training_config.get("logging_first_step", True),
445
- disable_tqdm=training_config.get("disable_tqdm", False),
446
- # Important: Don't remove columns that don't match model's forward method
447
- remove_unused_columns=False,
448
- # Disable flash attention
449
- attn_implementation="eager"
450
- )
 
 
 
451
 
452
  # Create trainer with pre-tokenized collator
453
  trainer = Trainer(
 
24
  # Disable flash attention globally
25
  os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
26
 
27
+ # Try to install flash-attention (for systems that support it)
28
+ try:
29
+ import subprocess
30
+ import sys
31
+
32
+ logger = logging.getLogger(__name__)
33
+ logger.info("Attempting to install flash-attention...")
34
+
35
+ # Install flash-attention
36
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"])
37
+ logger.info("Successfully installed flash-attention")
38
+ except Exception as e:
39
+ logger.warning(f"Failed to install flash-attention: {e}")
40
+ logger.info("Continuing without flash-attention")
41
+
42
  # Check if tensorboard is available
43
  try:
44
  import tensorboard
 
91
  # Get the dataset config
92
  dataset_config = config.get("dataset_config", {})
93
  sort_field = dataset_config.get("sort_by_field", "prompt_number")
 
94
 
95
+ # Always sort in ascending order by prompt_number
96
+ logger.info(f"Sorting dataset by {sort_field} in ascending order")
97
+ dataset = dataset.sort(sort_field)
 
 
 
98
 
99
+ # Verify sorting
100
+ if len(dataset) > 1:
101
+ first_prompt = dataset[0].get(sort_field, None)
102
+ last_prompt = dataset[-1].get(sort_field, None)
103
+ logger.info(f"Dataset sorted: first {sort_field}={first_prompt}, last {sort_field}={last_prompt}")
104
+
105
+ # Additional verification of a few samples
106
+ sample_indices = [0, len(dataset)//2, len(dataset)-1]
107
+ sample_prompts = [dataset[i].get(sort_field, None) for i in sample_indices]
108
+ logger.info(f"Sample prompt numbers: {sample_prompts}")
109
+
110
+ # Verify order is ascending
111
+ if not all(sample_prompts[i] <= sample_prompts[i+1] for i in range(len(sample_prompts)-1)):
112
+ logger.warning("Dataset may not be properly sorted! Please check the ordering.")
113
 
114
  # Print dataset structure for debugging
115
  logger.info(f"Dataset loaded with {len(dataset)} entries")
 
283
  """
284
  try:
285
  logger.info(f"Attempting to load model with unsloth optimizations: {model_name}")
286
+
287
+ # Create BitsAndBytesConfig for 4-bit quantization
288
+ from transformers import BitsAndBytesConfig
289
+ bnb_config = BitsAndBytesConfig(
290
+ load_in_4bit=True,
291
+ bnb_4bit_compute_dtype=torch.float16,
292
+ bnb_4bit_quant_type="nf4",
293
+ bnb_4bit_use_double_quant=True
294
+ )
295
+
296
+ # First try loading with unsloth but without flash attention
297
  try:
298
+ logger.info("Loading model with unsloth optimizations")
299
+ # Don't pass any flash attention parameters to unsloth
300
  model, tokenizer = FastLanguageModel.from_pretrained(
301
  model_name=model_name,
302
  max_seq_length=max_seq_length,
303
  dtype=dtype,
304
+ quantization_config=bnb_config
 
 
305
  )
306
+ logger.info("Model loaded successfully with unsloth")
307
  return model, tokenizer
308
 
309
+ except Exception as e:
310
+ logger.warning(f"Unsloth loading failed: {e}")
311
+ logger.info("Falling back to standard Hugging Face loading...")
312
+
313
+ # We'll try two approaches with HF loading
314
+
315
+ # Approach 1: Using attn_implementation parameter (newer method)
316
+ try:
317
+ logger.info("Trying HF loading with attn_implementation parameter")
318
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
319
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
320
 
321
+ # The proper way to disable flash attention in newer transformers
322
+ model = AutoModelForCausalLM.from_pretrained(
323
+ model_name,
324
+ config=config,
325
+ device_map="auto",
326
+ torch_dtype=dtype or torch.float16,
327
+ quantization_config=bnb_config,
328
+ trust_remote_code=True,
329
+ attn_implementation="eager" # Use eager instead of flash_attention_2
330
  )
331
+ logger.info("Model loaded successfully with HF using attn_implementation='eager'")
332
+ return model, tokenizer
333
+
334
+ except Exception as e:
335
+ logger.warning(f"HF loading with attn_implementation failed: {e}")
336
+ logger.info("Trying fallback method...")
337
+
338
+ # Approach 2: Complete fallback with minimal parameters
339
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
340
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
341
+
342
+ # Most basic loading without any attention parameters
343
+ model = AutoModelForCausalLM.from_pretrained(
344
+ model_name,
345
+ config=config,
346
+ device_map="auto",
347
+ torch_dtype=dtype or torch.float16,
348
+ quantization_config=bnb_config,
349
+ trust_remote_code=True
350
+ )
351
+ logger.info("Model loaded successfully with basic HF loading")
352
  return model, tokenizer
 
 
 
353
 
354
  except Exception as e:
355
+ logger.error(f"All model loading attempts failed: {e}")
356
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
  def train(config_path, dataset_name, output_dir):
359
  """Main training function - RESEARCH TRAINING PHASE ONLY"""
 
458
  reports = ["none"]
459
  logger.warning("No reporting backends available - training metrics won't be logged")
460
 
461
+ # Set up training arguments with correct parameters
462
+ # Extract only the valid parameters from hardware_config
463
+ training_args_dict = {
464
+ "output_dir": output_dir,
465
+ "num_train_epochs": training_config.get("num_train_epochs", 3),
466
+ "per_device_train_batch_size": training_config.get("per_device_train_batch_size", 2),
467
+ "gradient_accumulation_steps": training_config.get("gradient_accumulation_steps", 4),
468
+ "learning_rate": training_config.get("learning_rate", 2e-5),
469
+ "lr_scheduler_type": training_config.get("lr_scheduler_type", "cosine"),
470
+ "warmup_ratio": training_config.get("warmup_ratio", 0.03),
471
+ "weight_decay": training_config.get("weight_decay", 0.01),
472
+ "optim": training_config.get("optim", "adamw_torch"),
473
+ "logging_steps": training_config.get("logging_steps", 10),
474
+ "save_steps": training_config.get("save_steps", 200),
475
+ "save_total_limit": training_config.get("save_total_limit", 3),
476
+ "fp16": hardware_config.get("fp16", True),
477
+ "bf16": hardware_config.get("bf16", False),
478
+ "max_grad_norm": training_config.get("max_grad_norm", 0.3),
479
+ "report_to": reports,
480
+ "logging_first_step": training_config.get("logging_first_step", True),
481
+ "disable_tqdm": training_config.get("disable_tqdm", False),
482
+ "remove_unused_columns": False,
483
+ "shuffle_buffer_size": 1,
484
+ "seed": 42
485
+ }
486
+
487
+ # Create TrainingArguments with validated parameters
488
+ training_args = TrainingArguments(**training_args_dict)
489
 
490
  # Create trainer with pre-tokenized collator
491
  trainer = Trainer(