George-API commited on
Commit
9ef545f
·
verified ·
1 Parent(s): 60950b2

Upload run_cloud_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_cloud_training.py +26 -5
run_cloud_training.py CHANGED
@@ -16,11 +16,14 @@ from dotenv import load_dotenv
16
  import torch
17
  from datasets import load_dataset
18
  import transformers
19
- from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM
20
  from transformers.data.data_collator import DataCollatorMixin
21
  from peft import LoraConfig
22
  from unsloth import FastLanguageModel
23
 
 
 
 
24
  # Check if tensorboard is available
25
  try:
26
  import tensorboard
@@ -263,13 +266,16 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
263
  # First try the standard unsloth loading
264
  try:
265
  # Try loading with unsloth but without the problematic parameter
 
266
  model, tokenizer = FastLanguageModel.from_pretrained(
267
  model_name=model_name,
268
  max_seq_length=max_seq_length,
269
  dtype=dtype,
270
  load_in_4bit=True, # This should work for already quantized models
 
 
271
  )
272
- logger.info("Model loaded successfully with unsloth with 4-bit quantization")
273
  return model, tokenizer
274
 
275
  except TypeError as e:
@@ -283,6 +289,7 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
283
  model_name=model_name,
284
  max_seq_length=max_seq_length,
285
  dtype=dtype,
 
286
  )
287
  logger.info("Model loaded successfully with unsloth using alternative method")
288
  return model, tokenizer
@@ -295,14 +302,22 @@ def load_model_safely(model_name, max_seq_length, dtype=None):
295
  logger.warning(f"Unsloth loading failed: {e}")
296
  logger.info("Falling back to standard Hugging Face loading...")
297
 
 
 
 
 
 
 
298
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
299
  model = AutoModelForCausalLM.from_pretrained(
300
  model_name,
 
301
  device_map="auto",
302
  torch_dtype=dtype or torch.float16,
303
  load_in_4bit=True,
 
304
  )
305
- logger.info("Model loaded successfully with standard HF loading")
306
  return model, tokenizer
307
 
308
  def train(config_path, dataset_name, output_dir):
@@ -318,6 +333,10 @@ def train(config_path, dataset_name, output_dir):
318
  lora_config = config.get("lora_config", {})
319
  dataset_config = config.get("dataset_config", {})
320
 
 
 
 
 
321
  # Verify this is training phase only
322
  training_phase_only = dataset_config.get("training_phase_only", True)
323
  if not training_phase_only:
@@ -404,7 +423,7 @@ def train(config_path, dataset_name, output_dir):
404
  reports = ["none"]
405
  logger.warning("No reporting backends available - training metrics won't be logged")
406
 
407
- # Set up training arguments
408
  training_args = TrainingArguments(
409
  output_dir=output_dir,
410
  num_train_epochs=training_config.get("num_train_epochs", 3),
@@ -425,7 +444,9 @@ def train(config_path, dataset_name, output_dir):
425
  logging_first_step=training_config.get("logging_first_step", True),
426
  disable_tqdm=training_config.get("disable_tqdm", False),
427
  # Important: Don't remove columns that don't match model's forward method
428
- remove_unused_columns=False
 
 
429
  )
430
 
431
  # Create trainer with pre-tokenized collator
 
16
  import torch
17
  from datasets import load_dataset
18
  import transformers
19
+ from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM, AutoConfig
20
  from transformers.data.data_collator import DataCollatorMixin
21
  from peft import LoraConfig
22
  from unsloth import FastLanguageModel
23
 
24
+ # Disable flash attention globally
25
+ os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
26
+
27
  # Check if tensorboard is available
28
  try:
29
  import tensorboard
 
266
  # First try the standard unsloth loading
267
  try:
268
  # Try loading with unsloth but without the problematic parameter
269
+ logger.info("Loading model with flash attention DISABLED")
270
  model, tokenizer = FastLanguageModel.from_pretrained(
271
  model_name=model_name,
272
  max_seq_length=max_seq_length,
273
  dtype=dtype,
274
  load_in_4bit=True, # This should work for already quantized models
275
+ use_flash_attention=False, # Explicitly disable flash attention
276
+ attn_implementation="eager" # Use eager implementation instead
277
  )
278
+ logger.info("Model loaded successfully with unsloth with 4-bit quantization and flash attention disabled")
279
  return model, tokenizer
280
 
281
  except TypeError as e:
 
289
  model_name=model_name,
290
  max_seq_length=max_seq_length,
291
  dtype=dtype,
292
+ use_flash_attention=False, # Explicitly disable flash attention
293
  )
294
  logger.info("Model loaded successfully with unsloth using alternative method")
295
  return model, tokenizer
 
302
  logger.warning(f"Unsloth loading failed: {e}")
303
  logger.info("Falling back to standard Hugging Face loading...")
304
 
305
+ # Disable flash attention in transformers config
306
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
307
+ if hasattr(config, "use_flash_attention"):
308
+ config.use_flash_attention = False
309
+ logger.info("Disabled flash attention in model config")
310
+
311
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
312
  model = AutoModelForCausalLM.from_pretrained(
313
  model_name,
314
+ config=config,
315
  device_map="auto",
316
  torch_dtype=dtype or torch.float16,
317
  load_in_4bit=True,
318
+ attn_implementation="eager" # Use eager implementation instead of flash attention
319
  )
320
+ logger.info("Model loaded successfully with standard HF loading and flash attention disabled")
321
  return model, tokenizer
322
 
323
  def train(config_path, dataset_name, output_dir):
 
333
  lora_config = config.get("lora_config", {})
334
  dataset_config = config.get("dataset_config", {})
335
 
336
+ # Override flash attention setting to disable it
337
+ hardware_config["use_flash_attention"] = False
338
+ logger.info("Flash attention has been DISABLED due to GPU compatibility issues")
339
+
340
  # Verify this is training phase only
341
  training_phase_only = dataset_config.get("training_phase_only", True)
342
  if not training_phase_only:
 
423
  reports = ["none"]
424
  logger.warning("No reporting backends available - training metrics won't be logged")
425
 
426
+ # Set up training arguments with flash attention disabled
427
  training_args = TrainingArguments(
428
  output_dir=output_dir,
429
  num_train_epochs=training_config.get("num_train_epochs", 3),
 
444
  logging_first_step=training_config.get("logging_first_step", True),
445
  disable_tqdm=training_config.get("disable_tqdm", False),
446
  # Important: Don't remove columns that don't match model's forward method
447
+ remove_unused_columns=False,
448
+ # Disable flash attention
449
+ attn_implementation="eager"
450
  )
451
 
452
  # Create trainer with pre-tokenized collator