File size: 4,375 Bytes
126bdfd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import sys, os, zipfile, shutil, time, traceback, threading, uvicorn
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from datetime import datetime
from datasets import load_dataset
from huggingface_hub import HfApi
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
import torch
# === Sabitler ===
START_NUMBER = 0
END_NUMBER = 9
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
TOKENIZED_DATASET_ID = "UcsTurkey/turkish-general-culture-tokenized"
ZIP_UPLOAD_REPO = "UcsTurkey/trained-zips"
HF_TOKEN = os.environ.get("HF_TOKEN")
BATCH_SIZE = 1
EPOCHS = 2
MAX_LENGTH = 2048
OUTPUT_DIR = "/data/output"
ZIP_FOLDER = "/data/zip_temp"
zip_name = f"trained_model_{START_NUMBER:03d}_{END_NUMBER:03d}.zip"
ZIP_PATH = os.path.join(ZIP_FOLDER, zip_name)
# === Health check
app = FastAPI()
@app.get("/")
def health():
return JSONResponse(content={"status": "ok"})
def run_health_server():
uvicorn.run(app, host="0.0.0.0", port=7860)
threading.Thread(target=run_health_server, daemon=True).start()
# === Log
def log(message):
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {message}")
sys.stdout.flush()
# === Eğitim Başlıyor
log("🛠️ Ortam hazırlanıyor...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
log("🧠 Model indiriliyor...")
base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16)
base_model.config.pad_token_id = tokenizer.pad_token_id
log("🎯 LoRA adapter uygulanıyor...")
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=64, lora_alpha=16, lora_dropout=0.1,
bias="none", fan_in_fan_out=False
)
model = get_peft_model(base_model, peft_config)
model.print_trainable_parameters()
log("📦 Parquet dosyaları listeleniyor...")
api = HfApi()
files = api.list_repo_files(repo_id=TOKENIZED_DATASET_ID, repo_type="dataset", token=HF_TOKEN)
selected_files = sorted([f for f in files if f.startswith("chunk_") and f.endswith(".parquet")])[START_NUMBER:END_NUMBER+1]
if not selected_files:
log("⚠️ Parquet bulunamadı. Eğitim iptal.")
exit(0)
training_args = TrainingArguments(
output_dir=OUTPUT_DIR,
per_device_train_batch_size=BATCH_SIZE,
num_train_epochs=EPOCHS,
save_strategy="epoch",
save_total_limit=2,
learning_rate=2e-4,
disable_tqdm=True,
logging_strategy="steps",
logging_steps=10,
report_to=[],
bf16=True,
fp16=False
)
for file in selected_files:
try:
log(f"\n📄 Yükleniyor: {file}")
dataset = load_dataset(
path=TOKENIZED_DATASET_ID,
data_files={"train": file},
split="train",
token=HF_TOKEN
)
log(f"🔍 {len(dataset)} örnek")
if len(dataset) == 0:
continue
trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
log("🚀 Eğitim başlıyor...")
trainer.train()
log("✅ Eğitim tamam.")
except Exception as e:
log(f"❌ Hata: {file} → {e}")
traceback.print_exc()
# === Zip
log("📦 Model zipleniyor...")
try:
tmp_dir = os.path.join(ZIP_FOLDER, "temp_save")
os.makedirs(tmp_dir, exist_ok=True)
model.save_pretrained(tmp_dir)
tokenizer.save_pretrained(tmp_dir)
with zipfile.ZipFile(ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as zipf:
for root, _, files in os.walk(tmp_dir):
for file in files:
filepath = os.path.join(root, file)
arcname = os.path.relpath(filepath, tmp_dir)
zipf.write(filepath, arcname=os.path.join("output", arcname))
log(f"✅ Zip oluşturuldu: {ZIP_PATH}")
except Exception as e:
log(f"❌ Zipleme hatası: {e}")
traceback.print_exc()
# === Upload
try:
log("☁️ Hugging Face'e yükleniyor...")
api.upload_file(
path_or_fileobj=ZIP_PATH,
path_in_repo=zip_name,
repo_id=ZIP_UPLOAD_REPO,
repo_type="model",
token=HF_TOKEN
)
log("✅ Upload tamam.")
except Exception as e:
log(f"❌ Upload hatası: {e}")
traceback.print_exc()
log("⏸️ Eğitim tamamlandı. Servis bekleme modunda...")
while True:
time.sleep(60)
|