Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +42 -46
run_cloud_training.py
CHANGED
@@ -403,7 +403,8 @@ def remove_training_marker():
|
|
403 |
|
404 |
def load_model_safely(model_name, max_seq_length, dtype=None, use_flash_attention=False):
|
405 |
"""
|
406 |
-
Load the model with
|
|
|
407 |
"""
|
408 |
logger.info(f"Loading model: {model_name}")
|
409 |
|
@@ -421,51 +422,39 @@ def load_model_safely(model_name, max_seq_length, dtype=None, use_flash_attentio
|
|
421 |
)
|
422 |
|
423 |
# Force eager implementation to avoid BMGHK format issues
|
424 |
-
attn_implementation = "eager"
|
425 |
logger.info(f"Forcing eager attention implementation to avoid BMGHK format issues")
|
426 |
|
427 |
-
#
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
model = AutoModelForCausalLM.from_pretrained(
|
459 |
-
model_name,
|
460 |
-
config=config,
|
461 |
-
device_map="auto",
|
462 |
-
torch_dtype=dtype or torch.float16,
|
463 |
-
quantization_config=bnb_config,
|
464 |
-
trust_remote_code=True,
|
465 |
-
attn_implementation=attn_implementation
|
466 |
-
)
|
467 |
-
logger.info("Model loaded successfully with standard HF loading")
|
468 |
-
return model, tokenizer
|
469 |
|
470 |
def train(config_path, dataset_name, output_dir):
|
471 |
"""Main training function - RESEARCH TRAINING PHASE ONLY"""
|
@@ -536,7 +525,10 @@ def train(config_path, dataset_name, output_dir):
|
|
536 |
|
537 |
# Initialize model
|
538 |
logger.info("Initializing model (preserving 4-bit quantization)")
|
539 |
-
|
|
|
|
|
|
|
540 |
|
541 |
# Create LoRA config directly
|
542 |
logger.info("Creating LoRA configuration")
|
@@ -593,10 +585,14 @@ def train(config_path, dataset_name, output_dir):
|
|
593 |
logger.warning("No reporting backends available - training metrics won't be logged")
|
594 |
|
595 |
# Set up training arguments with correct parameters
|
|
|
|
|
|
|
|
|
596 |
training_args_dict = {
|
597 |
"output_dir": output_dir,
|
598 |
"num_train_epochs": training_config.get("num_train_epochs", 3),
|
599 |
-
"per_device_train_batch_size":
|
600 |
"gradient_accumulation_steps": training_config.get("gradient_accumulation_steps", 4),
|
601 |
"learning_rate": training_config.get("learning_rate", 2e-5),
|
602 |
"lr_scheduler_type": training_config.get("lr_scheduler_type", "cosine"),
|
|
|
403 |
|
404 |
def load_model_safely(model_name, max_seq_length, dtype=None, use_flash_attention=False):
|
405 |
"""
|
406 |
+
Load the model directly with HuggingFace, bypassing Unsloth optimizations
|
407 |
+
to avoid memory-efficient attention issues
|
408 |
"""
|
409 |
logger.info(f"Loading model: {model_name}")
|
410 |
|
|
|
422 |
)
|
423 |
|
424 |
# Force eager implementation to avoid BMGHK format issues
|
425 |
+
attn_implementation = "eager"
|
426 |
logger.info(f"Forcing eager attention implementation to avoid BMGHK format issues")
|
427 |
|
428 |
+
# Skip Unsloth and use standard HuggingFace loading
|
429 |
+
logger.info("Bypassing Unsloth optimizations to avoid memory-efficient attention issues")
|
430 |
+
|
431 |
+
# Load with standard HuggingFace
|
432 |
+
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
|
433 |
+
|
434 |
+
# Set attention implementation in config
|
435 |
+
config.attn_implementation = attn_implementation
|
436 |
+
|
437 |
+
# Disable any custom attention mechanisms
|
438 |
+
if hasattr(config, "use_flash_attention"):
|
439 |
+
config.use_flash_attention = False
|
440 |
+
if hasattr(config, "use_memory_efficient_attention"):
|
441 |
+
config.use_memory_efficient_attention = False
|
442 |
+
|
443 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
444 |
+
|
445 |
+
model = AutoModelForCausalLM.from_pretrained(
|
446 |
+
model_name,
|
447 |
+
config=config,
|
448 |
+
device_map="auto",
|
449 |
+
torch_dtype=dtype or torch.float16,
|
450 |
+
quantization_config=bnb_config,
|
451 |
+
trust_remote_code=True,
|
452 |
+
attn_implementation=attn_implementation,
|
453 |
+
use_flash_attention=False,
|
454 |
+
use_memory_efficient_attention=False
|
455 |
+
)
|
456 |
+
logger.info("Model loaded successfully with standard HF loading")
|
457 |
+
return model, tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
|
459 |
def train(config_path, dataset_name, output_dir):
|
460 |
"""Main training function - RESEARCH TRAINING PHASE ONLY"""
|
|
|
525 |
|
526 |
# Initialize model
|
527 |
logger.info("Initializing model (preserving 4-bit quantization)")
|
528 |
+
|
529 |
+
# Reduce max sequence length to avoid memory issues
|
530 |
+
max_seq_length = min(training_config.get("max_seq_length", 2048), 1024)
|
531 |
+
logger.info(f"Using reduced max sequence length: {max_seq_length} to avoid memory issues")
|
532 |
|
533 |
# Create LoRA config directly
|
534 |
logger.info("Creating LoRA configuration")
|
|
|
585 |
logger.warning("No reporting backends available - training metrics won't be logged")
|
586 |
|
587 |
# Set up training arguments with correct parameters
|
588 |
+
# REDUCE BATCH SIZE to avoid memory issues with attention
|
589 |
+
per_device_train_batch_size = 1 # Reduced from default of 2
|
590 |
+
logger.info(f"Using reduced batch size: {per_device_train_batch_size} to avoid memory issues")
|
591 |
+
|
592 |
training_args_dict = {
|
593 |
"output_dir": output_dir,
|
594 |
"num_train_epochs": training_config.get("num_train_epochs", 3),
|
595 |
+
"per_device_train_batch_size": per_device_train_batch_size,
|
596 |
"gradient_accumulation_steps": training_config.get("gradient_accumulation_steps", 4),
|
597 |
"learning_rate": training_config.get("learning_rate", 2e-5),
|
598 |
"lr_scheduler_type": training_config.get("lr_scheduler_type", "cosine"),
|