AbstractPhil commited on
Commit
53d9a8e
·
1 Parent(s): ec0268d
Files changed (1) hide show
  1. app.py +20 -23
app.py CHANGED
@@ -273,7 +273,7 @@ def build_bias_from_tokens(tokenizer, mapping: Dict[str, float]) -> torch.Tensor
273
  for t in tid:
274
  if isinstance(t, int) and t >= 0:
275
  bias[t] += float(w) / max(1, len(tid))
276
- elif isinstance(tid, int) and tid >= 0:
277
  bias[tid] += float(w)
278
  return bias
279
 
@@ -341,9 +341,6 @@ def zerogpu_generate(full_prompt,
341
  sc = StoppingCriteriaList([StopOnTokens(HARMONY_STOP_IDS)])
342
 
343
  # Generate
344
- # Build EOS list: use ONLY Harmony assistant-action stops (per OpenAI docs)
345
- eos_ids = HARMONY_STOP_IDS if HARMONY_AVAILABLE else tokenizer.eos_token_id
346
-
347
  out_ids = model.generate(
348
  **inputs,
349
  do_sample=bool(gen_kwargs.get("do_sample", True)),
@@ -659,15 +656,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
659
  value=""
660
  )
661
 
662
- # Chat interface - using only valid parameters
663
  # --- Harmony Inspector UI ---
664
- with gr.Accordion("Harmony Inspector", open=False):
665
- debug_prompt = gr.Textbox(label="Debug prompt", value="What is 2+2? Reply with just the number.")
666
- run_debug = gr.Button("Run Harmony Inspect")
667
- debug_out = gr.JSON(label="Parsed Harmony output", value={})
668
- run_debug.click(harmony_inspect_handler, inputs=[debug_prompt, system_prompt, reasoning_effort], outputs=[debug_out])
669
 
670
- chat = gr.ChatInterface(
 
671
  fn=generate_response,
672
  type="messages",
673
  additional_inputs=[
@@ -686,18 +683,18 @@ chat = gr.ChatInterface(
686
  cache_examples=False,
687
  )
688
 
689
- gr.Markdown(
690
- """
691
- ---
692
- ### Configuration:
693
- - **Model**: Set `MODEL_ID` env var (default: openai/gpt-oss-20b)
694
- - **Adapter**: Set `ADAPTER_ID` and optionally `ADAPTER_SUBFOLDER`
695
- - **Auth**: Set `HF_TOKEN` in Space secrets for private model access
696
- - **Harmony**: Install with `pip install openai-harmony` for proper channel support
697
-
698
- The model uses Harmony format with thinking channels (`thinking`, `analysis`, `final`).
699
- """
700
- )
701
 
702
  if __name__ == "__main__":
703
  demo.queue(max_size=8 if ZEROGPU else 32).launch(
 
273
  for t in tid:
274
  if isinstance(t, int) and t >= 0:
275
  bias[t] += float(w) / max(1, len(tid))
276
+ elif isinstance(tid, int) and t >= 0:
277
  bias[tid] += float(w)
278
  return bias
279
 
 
341
  sc = StoppingCriteriaList([StopOnTokens(HARMONY_STOP_IDS)])
342
 
343
  # Generate
 
 
 
344
  out_ids = model.generate(
345
  **inputs,
346
  do_sample=bool(gen_kwargs.get("do_sample", True)),
 
656
  value=""
657
  )
658
 
 
659
  # --- Harmony Inspector UI ---
660
+ with gr.Accordion("Harmony Inspector", open=False):
661
+ debug_prompt = gr.Textbox(label="Debug prompt", value="What is 2+2? Reply with just the number.")
662
+ run_debug = gr.Button("Run Harmony Inspect")
663
+ debug_out = gr.JSON(label="Parsed Harmony output", value={})
664
+ run_debug.click(harmony_inspect_handler, inputs=[debug_prompt, system_prompt, reasoning_effort], outputs=[debug_out])
665
 
666
+ # Chat interface - using only valid parameters
667
+ chat = gr.ChatInterface(
668
  fn=generate_response,
669
  type="messages",
670
  additional_inputs=[
 
683
  cache_examples=False,
684
  )
685
 
686
+ gr.Markdown(
687
+ """
688
+ ---
689
+ ### Configuration:
690
+ - **Model**: Set `MODEL_ID` env var (default: openai/gpt-oss-20b)
691
+ - **Adapter**: Set `ADAPTER_ID` and optionally `ADAPTER_SUBFOLDER`
692
+ - **Auth**: Set `HF_TOKEN` in Space secrets for private model access
693
+ - **Harmony**: Install with `pip install openai-harmony` for proper channel support
694
+
695
+ The model uses Harmony format with thinking channels (`thinking`, `analysis`, `final`).
696
+ """
697
+ )
698
 
699
  if __name__ == "__main__":
700
  demo.queue(max_size=8 if ZEROGPU else 32).launch(