Spaces:

George-API
/

qwen4bit

Sleeping

App Files Files Community

George-API commited on Mar 11

Commit

4d8bc74

verified ·

1 Parent(s): 45b968a

Upload run_cloud_training.py with huggingface_hub

Browse files

Files changed (1) hide show

run_cloud_training.py +3 -30

run_cloud_training.py CHANGED Viewed

@@ -469,9 +469,7 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
                 max_seq_length=max_seq_length,
                 dtype=dtype,
                 quantization_config=bnb_config,
-                attn_implementation="eager",  # Force eager attention
-                use_flash_attention=False,    # Explicitly disable flash attention
-                use_xformers_attention=False  # Explicitly disable xformers
             )
             logger.info("Model loaded successfully with unsloth")
@@ -479,12 +477,6 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
             if hasattr(model, 'config'):
                 if hasattr(model.config, 'attn_implementation'):
                     model.config.attn_implementation = "eager"
-                if hasattr(model.config, 'use_flash_attention'):
-                    model.config.use_flash_attention = False
-                if hasattr(model.config, 'use_flash_attention_2'):
-                    model.config.use_flash_attention_2 = False
-                if hasattr(model.config, 'use_xformers_attention'):
-                    model.config.use_xformers_attention = False
             return model, tokenizer
@@ -494,9 +486,7 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
             # We'll try with HF loading
             attn_params = {
-                "attn_implementation": "eager",  # Always use eager
-                "use_flash_attention": False,    # Explicitly disable flash attention
-                "use_xformers_attention": False  # Explicitly disable xformers
             }
             # Approach 1: Using attn_implementation parameter (newer method)
@@ -507,12 +497,6 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
                 # Disable flash attention in config
                 if hasattr(config, 'attn_implementation'):
                     config.attn_implementation = "eager"
-                if hasattr(config, 'use_flash_attention'):
-                    config.use_flash_attention = False
-                if hasattr(config, 'use_flash_attention_2'):
-                    config.use_flash_attention_2 = False
-                if hasattr(config, 'use_xformers_attention'):
-                    config.use_xformers_attention = False
                 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -539,12 +523,6 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
                 # Disable flash attention in config
                 if hasattr(config, 'attn_implementation'):
                     config.attn_implementation = "eager"
-                if hasattr(config, 'use_flash_attention'):
-                    config.use_flash_attention = False
-                if hasattr(config, 'use_flash_attention_2'):
-                    config.use_flash_attention_2 = False
-                if hasattr(config, 'use_xformers_attention'):
-                    config.use_xformers_attention = False
                 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -556,9 +534,7 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
                     torch_dtype=dtype or torch.float16,
                     quantization_config=bnb_config,
                     trust_remote_code=True,
-                    attn_implementation="eager",
-                    use_flash_attention=False,
-                    use_xformers_attention=False
                 )
                 logger.info("Model loaded successfully with basic HF loading")
                 return model, tokenizer
@@ -614,8 +590,6 @@ def train(config_path, dataset_name, output_dir):
     # Update hardware config to ensure eager attention
     hardware_config["attn_implementation"] = "eager"
-    hardware_config["use_flash_attention"] = False
-    hardware_config["use_xformers_attention"] = False
     # Verify this is training phase only
     training_phase_only = dataset_config.get("training_phase_only", True)
@@ -678,7 +652,6 @@ def train(config_path, dataset_name, output_dir):
         # Update hardware config to ensure eager attention
         hardware_config["attn_implementation"] = "eager"
-        hardware_config["use_flash_attention"] = False
         model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)

                 max_seq_length=max_seq_length,
                 dtype=dtype,
                 quantization_config=bnb_config,
+                attn_implementation="eager"  # Force eager attention
             )
             logger.info("Model loaded successfully with unsloth")
             if hasattr(model, 'config'):
                 if hasattr(model.config, 'attn_implementation'):
                     model.config.attn_implementation = "eager"
             return model, tokenizer
             # We'll try with HF loading
             attn_params = {
+                "attn_implementation": "eager"  # Always use eager
             }
             # Approach 1: Using attn_implementation parameter (newer method)
                 # Disable flash attention in config
                 if hasattr(config, 'attn_implementation'):
                     config.attn_implementation = "eager"
                 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
                 # Disable flash attention in config
                 if hasattr(config, 'attn_implementation'):
                     config.attn_implementation = "eager"
                 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
                     torch_dtype=dtype or torch.float16,
                     quantization_config=bnb_config,
                     trust_remote_code=True,
+                    attn_implementation="eager"
                 )
                 logger.info("Model loaded successfully with basic HF loading")
                 return model, tokenizer
     # Update hardware config to ensure eager attention
     hardware_config["attn_implementation"] = "eager"
     # Verify this is training phase only
     training_phase_only = dataset_config.get("training_phase_only", True)
         # Update hardware config to ensure eager attention
         hardware_config["attn_implementation"] = "eager"
         model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)