Spaces:
Running
on
Zero
Running
on
Zero
Adding initial eval code
Browse files
app.py
CHANGED
@@ -85,7 +85,21 @@ def build_prompt(desc:str):
|
|
85 |
@spaces.GPU
|
86 |
@torch.no_grad()
|
87 |
def draw(model, desc:str):
|
88 |
-
ensure_models()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
prompt = build_prompt(desc)
|
90 |
ids = tok(prompt, return_tensors="pt").to(DEVICE)
|
91 |
out = model.generate(**ids, max_new_tokens=MAX_NEW,
|
|
|
85 |
@spaces.GPU
|
86 |
@torch.no_grad()
|
87 |
def draw(model, desc:str):
|
88 |
+
# ensure_models()
|
89 |
+
from unsloth import FastLanguageModel
|
90 |
+
global base, tok, lora
|
91 |
+
if base is None:
|
92 |
+
print("Loading BASE …")
|
93 |
+
base, tok = FastLanguageModel.from_pretrained(
|
94 |
+
BASE_MODEL, max_seq_length=2048,
|
95 |
+
load_in_4bit=True, quantization_config=bnb_cfg, device_map="auto")
|
96 |
+
tok.pad_token = tok.eos_token
|
97 |
+
|
98 |
+
print("Loading LoRA …")
|
99 |
+
lora, _ = FastLanguageModel.from_pretrained(
|
100 |
+
ADAPTER_DIR, max_seq_length=2048,
|
101 |
+
load_in_4bit=True, quantization_config=bnb_cfg, device_map="auto")
|
102 |
+
print("✔ models loaded")
|
103 |
prompt = build_prompt(desc)
|
104 |
ids = tok(prompt, return_tensors="pt").to(DEVICE)
|
105 |
out = model.generate(**ids, max_new_tokens=MAX_NEW,
|