Twelve2five commited on
Commit
16c5c11
·
verified ·
1 Parent(s): fdebc65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -9
app.py CHANGED
@@ -425,8 +425,27 @@ def train_model(
425
  )
426
  log.append(f"Model files downloaded to {local_model_path}")
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  # Create a bnb configuration for loading the model in 4-bit
429
- # Not strictly necessary for A100 but keeps memory usage lower
430
  progress(0.25, desc="Loading model...")
431
  bnb_config = BitsAndBytesConfig(
432
  load_in_4bit=True,
@@ -435,21 +454,26 @@ def train_model(
435
  bnb_4bit_use_double_quant=False
436
  )
437
 
438
- # Load model and tokenizer
439
  model = AutoModelForCausalLM.from_pretrained(
440
  local_model_path,
441
  quantization_config=bnb_config,
442
  device_map="auto",
443
- torch_dtype=torch.bfloat16,
 
444
  )
445
- tokenizer = AutoTokenizer.from_pretrained(local_model_path)
446
 
447
- # Handle tokenizer settings
448
- if tokenizer.pad_token is None:
449
- tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
450
 
451
- log.append(f"Loaded model vocab size: {tokenizer.vocab_size}")
452
- log.append(f"Input embedding shape: {model.get_input_embeddings().weight.shape}")
 
453
 
454
  # PEFT Configuration (Smaller LoRA for faster iteration)
455
  model = prepare_model_for_kbit_training(model)
 
425
  )
426
  log.append(f"Model files downloaded to {local_model_path}")
427
 
428
+ # Check and fix the model config if needed
429
+ config_path = os.path.join(local_model_path, "config.json")
430
+ if os.path.exists(config_path):
431
+ with open(config_path, 'r') as f:
432
+ config_data = json.load(f)
433
+
434
+ # Fix the rope_scaling configuration
435
+ if 'rope_scaling' in config_data:
436
+ if not isinstance(config_data['rope_scaling'], dict):
437
+ config_data['rope_scaling'] = {"type": "linear", "factor": 2.0}
438
+ elif 'rope_type' in config_data['rope_scaling']:
439
+ # Convert complex rope_scaling to the simple format expected
440
+ rope_factor = config_data['rope_scaling'].get('factor', 2.0)
441
+ config_data['rope_scaling'] = {"type": "linear", "factor": rope_factor}
442
+
443
+ # Write the updated config back
444
+ with open(config_path, 'w') as f:
445
+ json.dump(config_data, f, indent=2)
446
+ log.append("Updated model configuration for rope_scaling")
447
+
448
  # Create a bnb configuration for loading the model in 4-bit
 
449
  progress(0.25, desc="Loading model...")
450
  bnb_config = BitsAndBytesConfig(
451
  load_in_4bit=True,
 
454
  bnb_4bit_use_double_quant=False
455
  )
456
 
457
+ # Load the model with fixed configuration
458
  model = AutoModelForCausalLM.from_pretrained(
459
  local_model_path,
460
  quantization_config=bnb_config,
461
  device_map="auto",
462
+ use_cache=False, # Needed for gradient checkpointing
463
+ torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
464
  )
 
465
 
466
+ # Load the tokenizer
467
+ tokenizer = AutoTokenizer.from_pretrained(
468
+ local_model_path,
469
+ padding_side="right",
470
+ use_fast=True,
471
+ )
472
+ tokenizer.pad_token = tokenizer.eos_token
473
 
474
+ # Find model's architecture type
475
+ model_type = model.config.model_type
476
+ log.append(f"Model architecture type: {model_type}")
477
 
478
  # PEFT Configuration (Smaller LoRA for faster iteration)
479
  model = prepare_model_for_kbit_training(model)