Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -425,8 +425,27 @@ def train_model(
|
|
425 |
)
|
426 |
log.append(f"Model files downloaded to {local_model_path}")
|
427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
# Create a bnb configuration for loading the model in 4-bit
|
429 |
-
# Not strictly necessary for A100 but keeps memory usage lower
|
430 |
progress(0.25, desc="Loading model...")
|
431 |
bnb_config = BitsAndBytesConfig(
|
432 |
load_in_4bit=True,
|
@@ -435,21 +454,26 @@ def train_model(
|
|
435 |
bnb_4bit_use_double_quant=False
|
436 |
)
|
437 |
|
438 |
-
# Load model
|
439 |
model = AutoModelForCausalLM.from_pretrained(
|
440 |
local_model_path,
|
441 |
quantization_config=bnb_config,
|
442 |
device_map="auto",
|
443 |
-
|
|
|
444 |
)
|
445 |
-
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
446 |
|
447 |
-
#
|
448 |
-
|
449 |
-
|
|
|
|
|
|
|
|
|
450 |
|
451 |
-
|
452 |
-
|
|
|
453 |
|
454 |
# PEFT Configuration (Smaller LoRA for faster iteration)
|
455 |
model = prepare_model_for_kbit_training(model)
|
|
|
425 |
)
|
426 |
log.append(f"Model files downloaded to {local_model_path}")
|
427 |
|
428 |
+
# Check and fix the model config if needed
|
429 |
+
config_path = os.path.join(local_model_path, "config.json")
|
430 |
+
if os.path.exists(config_path):
|
431 |
+
with open(config_path, 'r') as f:
|
432 |
+
config_data = json.load(f)
|
433 |
+
|
434 |
+
# Fix the rope_scaling configuration
|
435 |
+
if 'rope_scaling' in config_data:
|
436 |
+
if not isinstance(config_data['rope_scaling'], dict):
|
437 |
+
config_data['rope_scaling'] = {"type": "linear", "factor": 2.0}
|
438 |
+
elif 'rope_type' in config_data['rope_scaling']:
|
439 |
+
# Convert complex rope_scaling to the simple format expected
|
440 |
+
rope_factor = config_data['rope_scaling'].get('factor', 2.0)
|
441 |
+
config_data['rope_scaling'] = {"type": "linear", "factor": rope_factor}
|
442 |
+
|
443 |
+
# Write the updated config back
|
444 |
+
with open(config_path, 'w') as f:
|
445 |
+
json.dump(config_data, f, indent=2)
|
446 |
+
log.append("Updated model configuration for rope_scaling")
|
447 |
+
|
448 |
# Create a bnb configuration for loading the model in 4-bit
|
|
|
449 |
progress(0.25, desc="Loading model...")
|
450 |
bnb_config = BitsAndBytesConfig(
|
451 |
load_in_4bit=True,
|
|
|
454 |
bnb_4bit_use_double_quant=False
|
455 |
)
|
456 |
|
457 |
+
# Load the model with fixed configuration
|
458 |
model = AutoModelForCausalLM.from_pretrained(
|
459 |
local_model_path,
|
460 |
quantization_config=bnb_config,
|
461 |
device_map="auto",
|
462 |
+
use_cache=False, # Needed for gradient checkpointing
|
463 |
+
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
|
464 |
)
|
|
|
465 |
|
466 |
+
# Load the tokenizer
|
467 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
468 |
+
local_model_path,
|
469 |
+
padding_side="right",
|
470 |
+
use_fast=True,
|
471 |
+
)
|
472 |
+
tokenizer.pad_token = tokenizer.eos_token
|
473 |
|
474 |
+
# Find model's architecture type
|
475 |
+
model_type = model.config.model_type
|
476 |
+
log.append(f"Model architecture type: {model_type}")
|
477 |
|
478 |
# PEFT Configuration (Smaller LoRA for faster iteration)
|
479 |
model = prepare_model_for_kbit_training(model)
|