AbstractPhil commited on
Commit
644faa4
·
1 Parent(s): 625e096

more claude not helping

Browse files
Files changed (1) hide show
  1. app.py +17 -50
app.py CHANGED
@@ -320,15 +320,15 @@ def chat_to_messages(history: List[Any], system_prompt: str) -> List[Dict[str, s
320
  msgs.append({"role": "assistant", "content": str(a)})
321
  return msgs
322
 
323
- def generate_response_streaming(message: Any, history: List[Any], system_prompt: str,
324
- temperature: float, top_p: float, top_k: int, max_new_tokens: int,
325
- do_sample: bool, seed: Optional[int],
326
- rose_enable: bool, rose_alpha: float, rose_score: Optional[float],
327
- rose_tokens: str, rose_json: str,
328
- show_thinking: bool = False) -> Iterator[str]:
329
  """
330
- Streaming generator for ChatInterface.
331
- Yields partial responses to avoid h11 Content-Length issues.
332
  """
333
  try:
334
  # Normalize message and build Harmony prompt
@@ -338,9 +338,6 @@ def generate_response_streaming(message: Any, history: List[Any], system_prompt:
338
  msgs = chat_to_messages(history, system_prompt)
339
  msgs.append({"role": "user", "content": str(message)})
340
 
341
- # Yield initial status
342
- yield "🤔 Preparing prompt..."
343
-
344
  prompt = to_harmony_prompt(msgs)
345
 
346
  # Build Rose map if enabled
@@ -369,9 +366,6 @@ def generate_response_streaming(message: Any, history: List[Any], system_prompt:
369
  pass
370
  if not rose_map:
371
  rose_map = None
372
-
373
- # Update status
374
- yield "💭 Generating response..."
375
 
376
  # Generate with model
377
  full_output = zerogpu_generate(
@@ -392,19 +386,16 @@ def generate_response_streaming(message: Any, history: List[Any], system_prompt:
392
  # Extract final response from CoT output
393
  if show_thinking:
394
  # Show the full chain-of-thought process
395
- final_response = f"**Full Output (with thinking):**\n```\n{full_output}\n```\n\n**Final Response:**\n{extract_final_channel(full_output)}"
396
  else:
397
  # Just show the final response
398
- final_response = extract_final_channel(full_output)
399
-
400
- # Yield the final response
401
- yield final_response
402
 
403
  except Exception as e:
404
  error_msg = f"⚠️ Error: {str(e)}"
405
- print(f"[Error in generate_response_streaming] {error_msg}")
406
  print(traceback.format_exc())
407
- yield error_msg
408
 
409
  # -----------------------
410
  # UI
@@ -419,37 +410,16 @@ css = """
419
  """
420
 
421
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
422
- # OAuth status tracking
423
- login_status = gr.State(value=None)
424
-
425
  gr.Markdown(
426
  """
427
  # Mirel – Harmony Inference (ZeroGPU-ready)
428
 
429
  Chain-of-thought OSS-20B model with Harmony formatting.
430
  The model thinks through problems internally before providing a final response.
 
 
431
  """
432
  )
433
-
434
- # Add OAuth login button for browser-based auth
435
- with gr.Row():
436
- with gr.Column(scale=1):
437
- login_btn = gr.LoginButton(value="Sign in with Hugging Face", size="sm")
438
- with gr.Column(scale=3):
439
- auth_status = gr.Markdown("Not signed in - using default access")
440
-
441
- # OAuth handler to get user profile
442
- def update_auth_status(profile: gr.OAuthProfile | None) -> str:
443
- if profile:
444
- return f"✅ Signed in as **{profile.name}** (username: {profile.username})"
445
- else:
446
- if HF_TOKEN:
447
- return "✅ Using token from environment"
448
- else:
449
- return "Not signed in - using default access"
450
-
451
- # Update auth status on load
452
- demo.load(update_auth_status, inputs=None, outputs=auth_status)
453
 
454
  with gr.Row():
455
  with gr.Column(scale=3):
@@ -494,7 +464,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
494
 
495
  # Chat interface
496
  chat = gr.ChatInterface(
497
- fn=generate_response_streaming,
498
  chatbot=gr.Chatbot(elem_id="chatbot", height=500, type="messages"),
499
  additional_inputs=[
500
  system_prompt, temperature, top_p, top_k, max_new,
@@ -504,7 +474,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
504
  title=None, # Title already in markdown
505
  description=None, # Description already in markdown
506
  cache_examples=False,
507
- analytics_enabled=False,
508
  )
509
 
510
  gr.Markdown(
@@ -531,13 +500,11 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
531
  )
532
 
533
  if __name__ == "__main__":
534
- # Configure queue for better performance
535
  demo.queue(
536
- max_size=10 if ZEROGPU else 50,
537
- default_concurrency_limit=1 if ZEROGPU else 2 # Limit concurrent requests
538
  ).launch(
539
  server_name="0.0.0.0",
540
  server_port=7860,
541
  share=False,
542
- max_threads=40, # Default FastAPI thread pool size
543
  )
 
320
  msgs.append({"role": "assistant", "content": str(a)})
321
  return msgs
322
 
323
+ def generate_response(message: Any, history: List[Any], system_prompt: str,
324
+ temperature: float, top_p: float, top_k: int, max_new_tokens: int,
325
+ do_sample: bool, seed: Optional[int],
326
+ rose_enable: bool, rose_alpha: float, rose_score: Optional[float],
327
+ rose_tokens: str, rose_json: str,
328
+ show_thinking: bool = False) -> str:
329
  """
330
+ Non-streaming response generator for ChatInterface.
331
+ Returns a complete response to avoid h11 Content-Length issues.
332
  """
333
  try:
334
  # Normalize message and build Harmony prompt
 
338
  msgs = chat_to_messages(history, system_prompt)
339
  msgs.append({"role": "user", "content": str(message)})
340
 
 
 
 
341
  prompt = to_harmony_prompt(msgs)
342
 
343
  # Build Rose map if enabled
 
366
  pass
367
  if not rose_map:
368
  rose_map = None
 
 
 
369
 
370
  # Generate with model
371
  full_output = zerogpu_generate(
 
386
  # Extract final response from CoT output
387
  if show_thinking:
388
  # Show the full chain-of-thought process
389
+ return f"**Full Output (with thinking):**\n```\n{full_output}\n```\n\n**Final Response:**\n{extract_final_channel(full_output)}"
390
  else:
391
  # Just show the final response
392
+ return extract_final_channel(full_output)
 
 
 
393
 
394
  except Exception as e:
395
  error_msg = f"⚠️ Error: {str(e)}"
396
+ print(f"[Error in generate_response] {error_msg}")
397
  print(traceback.format_exc())
398
+ return error_msg
399
 
400
  # -----------------------
401
  # UI
 
410
  """
411
 
412
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
 
 
 
413
  gr.Markdown(
414
  """
415
  # Mirel – Harmony Inference (ZeroGPU-ready)
416
 
417
  Chain-of-thought OSS-20B model with Harmony formatting.
418
  The model thinks through problems internally before providing a final response.
419
+
420
+ **Auth:** Set `HF_TOKEN` in Space secrets or add `hf_oauth: true` to README for browser auth.
421
  """
422
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
  with gr.Row():
425
  with gr.Column(scale=3):
 
464
 
465
  # Chat interface
466
  chat = gr.ChatInterface(
467
+ fn=generate_response,
468
  chatbot=gr.Chatbot(elem_id="chatbot", height=500, type="messages"),
469
  additional_inputs=[
470
  system_prompt, temperature, top_p, top_k, max_new,
 
474
  title=None, # Title already in markdown
475
  description=None, # Description already in markdown
476
  cache_examples=False,
 
477
  )
478
 
479
  gr.Markdown(
 
500
  )
501
 
502
  if __name__ == "__main__":
503
+ # Simple queue configuration
504
  demo.queue(
505
+ max_size=10,
 
506
  ).launch(
507
  server_name="0.0.0.0",
508
  server_port=7860,
509
  share=False,
 
510
  )