Spaces:

UcsTurkey
/

mistral7b

Paused

App Files Files Community

ciyidogan commited on May 21

Commit

fe4457a

verified ·

1 Parent(s): ab83111

Update fine_tune_inference_test_mistral.py

Browse files

Files changed (1) hide show

fine_tune_inference_test_mistral.py +28 -12

fine_tune_inference_test_mistral.py CHANGED Viewed

@@ -5,9 +5,15 @@ from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 from huggingface_hub import hf_hub_download
-# === Ayarlar
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_BASE = "mistralai/Mistral-7B-Instruct-v0.2"
 USE_FINE_TUNE = False
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
@@ -23,8 +29,7 @@ FALLBACK_ANSWERS = [
 # === Log
 def log(message):
     timestamp = time.strftime("%H:%M:%S")
-    print(f"[{timestamp}] {message}")
-    os.sys.stdout.flush()
 # === FastAPI
 app = FastAPI()
@@ -58,7 +63,7 @@ def root():
                 body: JSON.stringify({ user_input: input })
             });
             const data = await res.json();
-            document.getElementById('output').value = data.answer || data.response || data.error || 'Hata oluştu.';
         }
         </script>
     </body>
@@ -77,13 +82,24 @@ def chat(msg: Message):
             return {"error": "Boş giriş"}
         messages = [{"role": "user", "content": user_input}]
-        input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to(model.device)
         generate_args = {
             "max_new_tokens": 128,
             "return_dict_in_generate": True,
             "output_scores": True,
-            "do_sample": USE_SAMPLING
         }
         if USE_SAMPLING:
@@ -94,10 +110,11 @@ def chat(msg: Message):
             })
         with torch.no_grad():
-            output = model.generate(input_ids=input_ids, **generate_args)
         decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True)
-        answer = decoded.split("</s>")[-1].strip()
         if output.scores and len(output.scores) > 0:
             first_token_score = output.scores[0][0]
@@ -119,14 +136,13 @@ def chat(msg: Message):
         return {"error": str(e)}
 def detect_env():
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    return device
 def setup_model():
     global model, tokenizer
     try:
         device = detect_env()
-        dtype = torch.float32
         if USE_FINE_TUNE:
             log("📦 Fine-tune zip indiriliyor...")
@@ -144,13 +160,13 @@ def setup_model():
             tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"), use_fast=False)
             base_model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device)
             model = PeftModel.from_pretrained(base_model, os.path.join(extract_dir, "output")).to(device)
         else:
             log("🧠 Ana model indiriliyor...")
             tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
             model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device)
         tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
         model.eval()
         log("✅ Model başarıyla yüklendi.")
     except Exception as e:

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 from huggingface_hub import hf_hub_download
+from datetime import datetime
+# === Ortam
 HF_TOKEN = os.getenv("HF_TOKEN")
+os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
+os.environ["TORCH_HOME"] = "/app/.torch_cache"
+os.makedirs("/app/.torch_cache", exist_ok=True)
+# === Ayarlar
 MODEL_BASE = "mistralai/Mistral-7B-Instruct-v0.2"
 USE_FINE_TUNE = False
 FINE_TUNE_REPO = "UcsTurkey/trained-zips"
 # === Log
 def log(message):
     timestamp = time.strftime("%H:%M:%S")
+    print(f"[{timestamp}] {message}", flush=True)
 # === FastAPI
 app = FastAPI()
                 body: JSON.stringify({ user_input: input })
             });
             const data = await res.json();
+            document.getElementById('output').value = data.answer || data.error || 'Hata oluştu.';
         }
         </script>
     </body>
             return {"error": "Boş giriş"}
         messages = [{"role": "user", "content": user_input}]
+        input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
+        if isinstance(input_ids, torch.Tensor):
+            input_ids = input_ids.to(model.device)
+            attention_mask = (input_ids != tokenizer.pad_token_id).long()
+            inputs = {"input_ids": input_ids, "attention_mask": attention_mask}
+        else:
+            inputs = {k: v.to(model.device) for k, v in input_ids.items()}
+            if "attention_mask" not in inputs:
+                inputs["attention_mask"] = (inputs["input_ids"] != tokenizer.pad_token_id).long()
         generate_args = {
             "max_new_tokens": 128,
             "return_dict_in_generate": True,
             "output_scores": True,
+            "do_sample": USE_SAMPLING,
+            "pad_token_id": tokenizer.pad_token_id,
+            "eos_token_id": tokenizer.eos_token_id,
+            "renormalize_logits": True
         }
         if USE_SAMPLING:
             })
         with torch.no_grad():
+            output = model.generate(**inputs, **generate_args)
         decoded = tokenizer.decode(output.sequences[0], skip_special_tokens=True)
+        input_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
+        answer = decoded.replace(input_text, "").strip()
         if output.scores and len(output.scores) > 0:
             first_token_score = output.scores[0][0]
         return {"error": str(e)}
 def detect_env():
+    return "cuda" if torch.cuda.is_available() else "cpu"
 def setup_model():
     global model, tokenizer
     try:
         device = detect_env()
+        dtype = torch.float32  # Dilersen torch.bfloat16 yapabilirsin
         if USE_FINE_TUNE:
             log("📦 Fine-tune zip indiriliyor...")
             tokenizer = AutoTokenizer.from_pretrained(os.path.join(extract_dir, "output"), use_fast=False)
             base_model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device)
             model = PeftModel.from_pretrained(base_model, os.path.join(extract_dir, "output")).to(device)
         else:
             log("🧠 Ana model indiriliyor...")
             tokenizer = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=False)
             model = AutoModelForCausalLM.from_pretrained(MODEL_BASE, torch_dtype=dtype).to(device)
         tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token
+        model.config.pad_token_id = tokenizer.pad_token_id
         model.eval()
         log("✅ Model başarıyla yüklendi.")
     except Exception as e: