Spaces:
Running
on
Zero
Running
on
Zero
AbstractPhil
commited on
Commit
·
53d9a8e
1
Parent(s):
ec0268d
yes
Browse files
app.py
CHANGED
@@ -273,7 +273,7 @@ def build_bias_from_tokens(tokenizer, mapping: Dict[str, float]) -> torch.Tensor
|
|
273 |
for t in tid:
|
274 |
if isinstance(t, int) and t >= 0:
|
275 |
bias[t] += float(w) / max(1, len(tid))
|
276 |
-
elif isinstance(tid, int) and
|
277 |
bias[tid] += float(w)
|
278 |
return bias
|
279 |
|
@@ -341,9 +341,6 @@ def zerogpu_generate(full_prompt,
|
|
341 |
sc = StoppingCriteriaList([StopOnTokens(HARMONY_STOP_IDS)])
|
342 |
|
343 |
# Generate
|
344 |
-
# Build EOS list: use ONLY Harmony assistant-action stops (per OpenAI docs)
|
345 |
-
eos_ids = HARMONY_STOP_IDS if HARMONY_AVAILABLE else tokenizer.eos_token_id
|
346 |
-
|
347 |
out_ids = model.generate(
|
348 |
**inputs,
|
349 |
do_sample=bool(gen_kwargs.get("do_sample", True)),
|
@@ -659,15 +656,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
659 |
value=""
|
660 |
)
|
661 |
|
662 |
-
# Chat interface - using only valid parameters
|
663 |
# --- Harmony Inspector UI ---
|
664 |
-
with gr.Accordion("Harmony Inspector", open=False):
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
|
670 |
-
|
|
|
671 |
fn=generate_response,
|
672 |
type="messages",
|
673 |
additional_inputs=[
|
@@ -686,18 +683,18 @@ chat = gr.ChatInterface(
|
|
686 |
cache_examples=False,
|
687 |
)
|
688 |
|
689 |
-
gr.Markdown(
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
)
|
701 |
|
702 |
if __name__ == "__main__":
|
703 |
demo.queue(max_size=8 if ZEROGPU else 32).launch(
|
|
|
273 |
for t in tid:
|
274 |
if isinstance(t, int) and t >= 0:
|
275 |
bias[t] += float(w) / max(1, len(tid))
|
276 |
+
elif isinstance(tid, int) and t >= 0:
|
277 |
bias[tid] += float(w)
|
278 |
return bias
|
279 |
|
|
|
341 |
sc = StoppingCriteriaList([StopOnTokens(HARMONY_STOP_IDS)])
|
342 |
|
343 |
# Generate
|
|
|
|
|
|
|
344 |
out_ids = model.generate(
|
345 |
**inputs,
|
346 |
do_sample=bool(gen_kwargs.get("do_sample", True)),
|
|
|
656 |
value=""
|
657 |
)
|
658 |
|
|
|
659 |
# --- Harmony Inspector UI ---
|
660 |
+
with gr.Accordion("Harmony Inspector", open=False):
|
661 |
+
debug_prompt = gr.Textbox(label="Debug prompt", value="What is 2+2? Reply with just the number.")
|
662 |
+
run_debug = gr.Button("Run Harmony Inspect")
|
663 |
+
debug_out = gr.JSON(label="Parsed Harmony output", value={})
|
664 |
+
run_debug.click(harmony_inspect_handler, inputs=[debug_prompt, system_prompt, reasoning_effort], outputs=[debug_out])
|
665 |
|
666 |
+
# Chat interface - using only valid parameters
|
667 |
+
chat = gr.ChatInterface(
|
668 |
fn=generate_response,
|
669 |
type="messages",
|
670 |
additional_inputs=[
|
|
|
683 |
cache_examples=False,
|
684 |
)
|
685 |
|
686 |
+
gr.Markdown(
|
687 |
+
"""
|
688 |
+
---
|
689 |
+
### Configuration:
|
690 |
+
- **Model**: Set `MODEL_ID` env var (default: openai/gpt-oss-20b)
|
691 |
+
- **Adapter**: Set `ADAPTER_ID` and optionally `ADAPTER_SUBFOLDER`
|
692 |
+
- **Auth**: Set `HF_TOKEN` in Space secrets for private model access
|
693 |
+
- **Harmony**: Install with `pip install openai-harmony` for proper channel support
|
694 |
+
|
695 |
+
The model uses Harmony format with thinking channels (`thinking`, `analysis`, `final`).
|
696 |
+
"""
|
697 |
+
)
|
698 |
|
699 |
if __name__ == "__main__":
|
700 |
demo.queue(max_size=8 if ZEROGPU else 32).launch(
|