Spaces:
Building
on
Zero
Building
on
Zero
AbstractPhil
commited on
Commit
·
4b732ce
1
Parent(s):
3d65633
yes
Browse files
app.py
CHANGED
@@ -73,8 +73,8 @@ def _hf_login() -> None:
|
|
73 |
else:
|
74 |
print("[HF Auth] No token found in environment variables")
|
75 |
|
76 |
-
# Login
|
77 |
-
_hf_login()
|
78 |
|
79 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
80 |
|
@@ -364,12 +364,17 @@ def zerogpu_generate(full_prompt,
|
|
364 |
out_ids = model.generate(
|
365 |
**inputs,
|
366 |
do_sample=bool(gen_kwargs.get("do_sample", True)),
|
367 |
-
temperature=float(gen_kwargs.get("temperature", 0.
|
368 |
-
top_p=float(gen_kwargs.get("top_p"
|
369 |
-
top_k=(int(gen_kwargs.get("top_k")) if gen_kwargs.get("top_k")
|
370 |
max_new_tokens=int(gen_kwargs.get("max_new_tokens", MAX_DEF)),
|
371 |
pad_token_id=model.config.pad_token_id,
|
372 |
eos_token_id=tokenizer.eos_token_id,
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
logits_processor=logits_processor,
|
375 |
repetition_penalty=float(gen_kwargs.get("repetition_penalty", 1.2)),
|
@@ -424,7 +429,7 @@ def zerogpu_generate(full_prompt,
|
|
424 |
# Simple (non-Harmony) GPU path — matches your minimal example
|
425 |
# -----------------------
|
426 |
@spaces.GPU(duration=120)
|
427 |
-
def zerogpu_generate_simple(prompt_str: str, gen_kwargs: Dict[str, Any], rose_map: Optional[Dict[str, float]], rose_alpha: float, seed: Optional[int]) -> Dict[str, str]:
|
428 |
"""Straight chat_template path. No Harmony tokens. Slices completion from prompt_len.
|
429 |
Mirrors the minimal HF example and avoids header loops entirely."""
|
430 |
model = None
|
@@ -445,7 +450,8 @@ def zerogpu_generate_simple(prompt_str: str, gen_kwargs: Dict[str, Any], rose_ma
|
|
445 |
logits_processor = None
|
446 |
if rose_map:
|
447 |
bias = build_bias_from_tokens(tokenizer, rose_map).to(device)
|
448 |
-
|
|
|
449 |
|
450 |
out_ids = model.generate(
|
451 |
**inputs,
|
@@ -512,7 +518,6 @@ def zerogpu_generate_debug(full_prompt, gen_kwargs: Dict[str, Any]) -> Dict[str,
|
|
512 |
max_new_tokens=int(gen_kwargs.get("max_new_tokens", MAX_DEF)),
|
513 |
pad_token_id=model.config.pad_token_id,
|
514 |
eos_token_id=tokenizer.eos_token_id,
|
515 |
-
bad_words_ids=bad_words_ids,
|
516 |
stopping_criteria=sc,
|
517 |
repetition_penalty=float(gen_kwargs.get("repetition_penalty", 1.15)),
|
518 |
no_repeat_ngram_size=int(gen_kwargs.get("no_repeat_ngram_size", 6)),
|
@@ -653,6 +658,7 @@ def generate_response(message: str, history: List[List[str]], system_prompt: str
|
|
653 |
},
|
654 |
rose_map,
|
655 |
float(rose_alpha),
|
|
|
656 |
int(seed) if seed is not None else None,
|
657 |
)
|
658 |
else:
|
|
|
73 |
else:
|
74 |
print("[HF Auth] No token found in environment variables")
|
75 |
|
76 |
+
# Login is handled by Space OAuth/session; avoid explicit CLI login here to prevent OAuth var errors
|
77 |
+
# _hf_login()
|
78 |
|
79 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
80 |
|
|
|
364 |
out_ids = model.generate(
|
365 |
**inputs,
|
366 |
do_sample=bool(gen_kwargs.get("do_sample", True)),
|
367 |
+
temperature=float(gen_kwargs.get("temperature", 0.6)),
|
368 |
+
top_p=(float(gen_kwargs.get("top_p")) if gen_kwargs.get("top_p") is not None else None),
|
369 |
+
top_k=(int(gen_kwargs.get("top_k")) if gen_kwargs.get("top_k") else None),
|
370 |
max_new_tokens=int(gen_kwargs.get("max_new_tokens", MAX_DEF)),
|
371 |
pad_token_id=model.config.pad_token_id,
|
372 |
eos_token_id=tokenizer.eos_token_id,
|
373 |
+
repetition_penalty=float(gen_kwargs.get("repetition_penalty", 1.1)),
|
374 |
+
no_repeat_ngram_size=int(gen_kwargs.get("no_repeat_ngram_size", 6)),
|
375 |
+
logits_processor=logits_processor,
|
376 |
+
)
|
377 |
+
eos_token_id=tokenizer.eos_token_id,
|
378 |
|
379 |
logits_processor=logits_processor,
|
380 |
repetition_penalty=float(gen_kwargs.get("repetition_penalty", 1.2)),
|
|
|
429 |
# Simple (non-Harmony) GPU path — matches your minimal example
|
430 |
# -----------------------
|
431 |
@spaces.GPU(duration=120)
|
432 |
+
def zerogpu_generate_simple(prompt_str: str, gen_kwargs: Dict[str, Any], rose_map: Optional[Dict[str, float]], rose_alpha: float, rose_score: Optional[float], seed: Optional[int]) -> Dict[str, str]:
|
433 |
"""Straight chat_template path. No Harmony tokens. Slices completion from prompt_len.
|
434 |
Mirrors the minimal HF example and avoids header loops entirely."""
|
435 |
model = None
|
|
|
450 |
logits_processor = None
|
451 |
if rose_map:
|
452 |
bias = build_bias_from_tokens(tokenizer, rose_map).to(device)
|
453 |
+
eff_alpha = float(rose_alpha) * (float(rose_score) if rose_score is not None else 1.0)
|
454 |
+
logits_processor = [RoseGuidedLogits(bias, eff_alpha)]
|
455 |
|
456 |
out_ids = model.generate(
|
457 |
**inputs,
|
|
|
518 |
max_new_tokens=int(gen_kwargs.get("max_new_tokens", MAX_DEF)),
|
519 |
pad_token_id=model.config.pad_token_id,
|
520 |
eos_token_id=tokenizer.eos_token_id,
|
|
|
521 |
stopping_criteria=sc,
|
522 |
repetition_penalty=float(gen_kwargs.get("repetition_penalty", 1.15)),
|
523 |
no_repeat_ngram_size=int(gen_kwargs.get("no_repeat_ngram_size", 6)),
|
|
|
658 |
},
|
659 |
rose_map,
|
660 |
float(rose_alpha),
|
661 |
+
float(rose_score) if rose_score is not None else None,
|
662 |
int(seed) if seed is not None else None,
|
663 |
)
|
664 |
else:
|