Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +63 -12
run_cloud_training.py
CHANGED
@@ -16,7 +16,7 @@ from dotenv import load_dotenv
|
|
16 |
import torch
|
17 |
from datasets import load_dataset
|
18 |
import transformers
|
19 |
-
from transformers import AutoTokenizer, TrainingArguments, Trainer
|
20 |
from transformers.data.data_collator import DataCollatorMixin
|
21 |
from peft import LoraConfig
|
22 |
from unsloth import FastLanguageModel
|
@@ -153,6 +153,58 @@ def remove_training_marker():
|
|
153 |
os.remove("TRAINING_ACTIVE")
|
154 |
logger.info("Removed training active marker")
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
def train(config_path, dataset_name, output_dir):
|
157 |
"""Main training function - RESEARCH TRAINING PHASE ONLY"""
|
158 |
# Load environment variables
|
@@ -186,7 +238,8 @@ def train(config_path, dataset_name, output_dir):
|
|
186 |
# Print configuration summary
|
187 |
logger.info("RESEARCH TRAINING PHASE ACTIVE - No output generation")
|
188 |
logger.info("Configuration Summary:")
|
189 |
-
|
|
|
190 |
logger.info(f"Dataset: {dataset_name if dataset_name != 'phi4-cognitive-dataset' else DEFAULT_DATASET}")
|
191 |
logger.info(f"Output directory: {output_dir}")
|
192 |
logger.info("IMPORTANT: Using already 4-bit quantized model - not re-quantizing")
|
@@ -197,7 +250,7 @@ def train(config_path, dataset_name, output_dir):
|
|
197 |
# Initialize tokenizer (just for model initialization, not for tokenizing data)
|
198 |
logger.info("Loading tokenizer (for model initialization only, not for tokenizing data)")
|
199 |
tokenizer = AutoTokenizer.from_pretrained(
|
200 |
-
|
201 |
trust_remote_code=True
|
202 |
)
|
203 |
tokenizer.pad_token = tokenizer.eos_token
|
@@ -215,15 +268,13 @@ def train(config_path, dataset_name, output_dir):
|
|
215 |
target_modules=lora_config.get("target_modules", ["q_proj", "k_proj", "v_proj", "o_proj"])
|
216 |
)
|
217 |
|
218 |
-
# Initialize model with
|
219 |
-
logger.info("Loading pre-quantized model
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
use_existing_bnb_quantization=True # Use the existing quantization
|
226 |
-
)
|
227 |
model = FastLanguageModel.get_peft_model(
|
228 |
model,
|
229 |
peft_config=peft_config,
|
|
|
16 |
import torch
|
17 |
from datasets import load_dataset
|
18 |
import transformers
|
19 |
+
from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM
|
20 |
from transformers.data.data_collator import DataCollatorMixin
|
21 |
from peft import LoraConfig
|
22 |
from unsloth import FastLanguageModel
|
|
|
153 |
os.remove("TRAINING_ACTIVE")
|
154 |
logger.info("Removed training active marker")
|
155 |
|
156 |
+
def load_model_safely(model_name, max_seq_length, dtype=None):
|
157 |
+
"""
|
158 |
+
Load the model in a safe way that works with Qwen models
|
159 |
+
by trying different loading strategies.
|
160 |
+
"""
|
161 |
+
try:
|
162 |
+
logger.info(f"Attempting to load model with unsloth optimizations: {model_name}")
|
163 |
+
# First try the standard unsloth loading
|
164 |
+
try:
|
165 |
+
# Try loading with unsloth but without the problematic parameter
|
166 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
167 |
+
model_name=model_name,
|
168 |
+
max_seq_length=max_seq_length,
|
169 |
+
dtype=dtype,
|
170 |
+
load_in_4bit=True, # This should work for already quantized models
|
171 |
+
)
|
172 |
+
logger.info("Model loaded successfully with unsloth with 4-bit quantization")
|
173 |
+
return model, tokenizer
|
174 |
+
|
175 |
+
except TypeError as e:
|
176 |
+
# If we get a TypeError about unexpected keyword arguments
|
177 |
+
if "unexpected keyword argument" in str(e):
|
178 |
+
logger.warning(f"Unsloth loading error with 4-bit: {e}")
|
179 |
+
logger.info("Trying alternative loading method for Qwen model...")
|
180 |
+
|
181 |
+
# Try loading with different parameters for Qwen model
|
182 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
183 |
+
model_name=model_name,
|
184 |
+
max_seq_length=max_seq_length,
|
185 |
+
dtype=dtype,
|
186 |
+
)
|
187 |
+
logger.info("Model loaded successfully with unsloth using alternative method")
|
188 |
+
return model, tokenizer
|
189 |
+
else:
|
190 |
+
# Re-raise if it's a different type error
|
191 |
+
raise
|
192 |
+
|
193 |
+
except Exception as e:
|
194 |
+
# Fallback to standard loading if unsloth methods fail
|
195 |
+
logger.warning(f"Unsloth loading failed: {e}")
|
196 |
+
logger.info("Falling back to standard Hugging Face loading...")
|
197 |
+
|
198 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
199 |
+
model = AutoModelForCausalLM.from_pretrained(
|
200 |
+
model_name,
|
201 |
+
device_map="auto",
|
202 |
+
torch_dtype=dtype or torch.float16,
|
203 |
+
load_in_4bit=True,
|
204 |
+
)
|
205 |
+
logger.info("Model loaded successfully with standard HF loading")
|
206 |
+
return model, tokenizer
|
207 |
+
|
208 |
def train(config_path, dataset_name, output_dir):
|
209 |
"""Main training function - RESEARCH TRAINING PHASE ONLY"""
|
210 |
# Load environment variables
|
|
|
238 |
# Print configuration summary
|
239 |
logger.info("RESEARCH TRAINING PHASE ACTIVE - No output generation")
|
240 |
logger.info("Configuration Summary:")
|
241 |
+
model_name = model_config.get("model_name_or_path")
|
242 |
+
logger.info(f"Model: {model_name}")
|
243 |
logger.info(f"Dataset: {dataset_name if dataset_name != 'phi4-cognitive-dataset' else DEFAULT_DATASET}")
|
244 |
logger.info(f"Output directory: {output_dir}")
|
245 |
logger.info("IMPORTANT: Using already 4-bit quantized model - not re-quantizing")
|
|
|
250 |
# Initialize tokenizer (just for model initialization, not for tokenizing data)
|
251 |
logger.info("Loading tokenizer (for model initialization only, not for tokenizing data)")
|
252 |
tokenizer = AutoTokenizer.from_pretrained(
|
253 |
+
model_name,
|
254 |
trust_remote_code=True
|
255 |
)
|
256 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
268 |
target_modules=lora_config.get("target_modules", ["q_proj", "k_proj", "v_proj", "o_proj"])
|
269 |
)
|
270 |
|
271 |
+
# Initialize model with our safe loading function
|
272 |
+
logger.info("Loading pre-quantized model safely")
|
273 |
+
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
274 |
+
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
275 |
+
|
276 |
+
# Apply LoRA
|
277 |
+
logger.info("Applying LoRA to model")
|
|
|
|
|
278 |
model = FastLanguageModel.get_peft_model(
|
279 |
model,
|
280 |
peft_config=peft_config,
|