Spaces:
Running
on
Zero
Running
on
Zero
AbstractPhil
commited on
Commit
·
644faa4
1
Parent(s):
625e096
more claude not helping
Browse files
app.py
CHANGED
@@ -320,15 +320,15 @@ def chat_to_messages(history: List[Any], system_prompt: str) -> List[Dict[str, s
|
|
320 |
msgs.append({"role": "assistant", "content": str(a)})
|
321 |
return msgs
|
322 |
|
323 |
-
def
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
"""
|
330 |
-
|
331 |
-
|
332 |
"""
|
333 |
try:
|
334 |
# Normalize message and build Harmony prompt
|
@@ -338,9 +338,6 @@ def generate_response_streaming(message: Any, history: List[Any], system_prompt:
|
|
338 |
msgs = chat_to_messages(history, system_prompt)
|
339 |
msgs.append({"role": "user", "content": str(message)})
|
340 |
|
341 |
-
# Yield initial status
|
342 |
-
yield "🤔 Preparing prompt..."
|
343 |
-
|
344 |
prompt = to_harmony_prompt(msgs)
|
345 |
|
346 |
# Build Rose map if enabled
|
@@ -369,9 +366,6 @@ def generate_response_streaming(message: Any, history: List[Any], system_prompt:
|
|
369 |
pass
|
370 |
if not rose_map:
|
371 |
rose_map = None
|
372 |
-
|
373 |
-
# Update status
|
374 |
-
yield "💭 Generating response..."
|
375 |
|
376 |
# Generate with model
|
377 |
full_output = zerogpu_generate(
|
@@ -392,19 +386,16 @@ def generate_response_streaming(message: Any, history: List[Any], system_prompt:
|
|
392 |
# Extract final response from CoT output
|
393 |
if show_thinking:
|
394 |
# Show the full chain-of-thought process
|
395 |
-
|
396 |
else:
|
397 |
# Just show the final response
|
398 |
-
|
399 |
-
|
400 |
-
# Yield the final response
|
401 |
-
yield final_response
|
402 |
|
403 |
except Exception as e:
|
404 |
error_msg = f"⚠️ Error: {str(e)}"
|
405 |
-
print(f"[Error in
|
406 |
print(traceback.format_exc())
|
407 |
-
|
408 |
|
409 |
# -----------------------
|
410 |
# UI
|
@@ -419,37 +410,16 @@ css = """
|
|
419 |
"""
|
420 |
|
421 |
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
422 |
-
# OAuth status tracking
|
423 |
-
login_status = gr.State(value=None)
|
424 |
-
|
425 |
gr.Markdown(
|
426 |
"""
|
427 |
# Mirel – Harmony Inference (ZeroGPU-ready)
|
428 |
|
429 |
Chain-of-thought OSS-20B model with Harmony formatting.
|
430 |
The model thinks through problems internally before providing a final response.
|
|
|
|
|
431 |
"""
|
432 |
)
|
433 |
-
|
434 |
-
# Add OAuth login button for browser-based auth
|
435 |
-
with gr.Row():
|
436 |
-
with gr.Column(scale=1):
|
437 |
-
login_btn = gr.LoginButton(value="Sign in with Hugging Face", size="sm")
|
438 |
-
with gr.Column(scale=3):
|
439 |
-
auth_status = gr.Markdown("Not signed in - using default access")
|
440 |
-
|
441 |
-
# OAuth handler to get user profile
|
442 |
-
def update_auth_status(profile: gr.OAuthProfile | None) -> str:
|
443 |
-
if profile:
|
444 |
-
return f"✅ Signed in as **{profile.name}** (username: {profile.username})"
|
445 |
-
else:
|
446 |
-
if HF_TOKEN:
|
447 |
-
return "✅ Using token from environment"
|
448 |
-
else:
|
449 |
-
return "Not signed in - using default access"
|
450 |
-
|
451 |
-
# Update auth status on load
|
452 |
-
demo.load(update_auth_status, inputs=None, outputs=auth_status)
|
453 |
|
454 |
with gr.Row():
|
455 |
with gr.Column(scale=3):
|
@@ -494,7 +464,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
494 |
|
495 |
# Chat interface
|
496 |
chat = gr.ChatInterface(
|
497 |
-
fn=
|
498 |
chatbot=gr.Chatbot(elem_id="chatbot", height=500, type="messages"),
|
499 |
additional_inputs=[
|
500 |
system_prompt, temperature, top_p, top_k, max_new,
|
@@ -504,7 +474,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
504 |
title=None, # Title already in markdown
|
505 |
description=None, # Description already in markdown
|
506 |
cache_examples=False,
|
507 |
-
analytics_enabled=False,
|
508 |
)
|
509 |
|
510 |
gr.Markdown(
|
@@ -531,13 +500,11 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
531 |
)
|
532 |
|
533 |
if __name__ == "__main__":
|
534 |
-
#
|
535 |
demo.queue(
|
536 |
-
max_size=10
|
537 |
-
default_concurrency_limit=1 if ZEROGPU else 2 # Limit concurrent requests
|
538 |
).launch(
|
539 |
server_name="0.0.0.0",
|
540 |
server_port=7860,
|
541 |
share=False,
|
542 |
-
max_threads=40, # Default FastAPI thread pool size
|
543 |
)
|
|
|
320 |
msgs.append({"role": "assistant", "content": str(a)})
|
321 |
return msgs
|
322 |
|
323 |
+
def generate_response(message: Any, history: List[Any], system_prompt: str,
|
324 |
+
temperature: float, top_p: float, top_k: int, max_new_tokens: int,
|
325 |
+
do_sample: bool, seed: Optional[int],
|
326 |
+
rose_enable: bool, rose_alpha: float, rose_score: Optional[float],
|
327 |
+
rose_tokens: str, rose_json: str,
|
328 |
+
show_thinking: bool = False) -> str:
|
329 |
"""
|
330 |
+
Non-streaming response generator for ChatInterface.
|
331 |
+
Returns a complete response to avoid h11 Content-Length issues.
|
332 |
"""
|
333 |
try:
|
334 |
# Normalize message and build Harmony prompt
|
|
|
338 |
msgs = chat_to_messages(history, system_prompt)
|
339 |
msgs.append({"role": "user", "content": str(message)})
|
340 |
|
|
|
|
|
|
|
341 |
prompt = to_harmony_prompt(msgs)
|
342 |
|
343 |
# Build Rose map if enabled
|
|
|
366 |
pass
|
367 |
if not rose_map:
|
368 |
rose_map = None
|
|
|
|
|
|
|
369 |
|
370 |
# Generate with model
|
371 |
full_output = zerogpu_generate(
|
|
|
386 |
# Extract final response from CoT output
|
387 |
if show_thinking:
|
388 |
# Show the full chain-of-thought process
|
389 |
+
return f"**Full Output (with thinking):**\n```\n{full_output}\n```\n\n**Final Response:**\n{extract_final_channel(full_output)}"
|
390 |
else:
|
391 |
# Just show the final response
|
392 |
+
return extract_final_channel(full_output)
|
|
|
|
|
|
|
393 |
|
394 |
except Exception as e:
|
395 |
error_msg = f"⚠️ Error: {str(e)}"
|
396 |
+
print(f"[Error in generate_response] {error_msg}")
|
397 |
print(traceback.format_exc())
|
398 |
+
return error_msg
|
399 |
|
400 |
# -----------------------
|
401 |
# UI
|
|
|
410 |
"""
|
411 |
|
412 |
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
|
|
|
|
|
|
|
413 |
gr.Markdown(
|
414 |
"""
|
415 |
# Mirel – Harmony Inference (ZeroGPU-ready)
|
416 |
|
417 |
Chain-of-thought OSS-20B model with Harmony formatting.
|
418 |
The model thinks through problems internally before providing a final response.
|
419 |
+
|
420 |
+
**Auth:** Set `HF_TOKEN` in Space secrets or add `hf_oauth: true` to README for browser auth.
|
421 |
"""
|
422 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
|
424 |
with gr.Row():
|
425 |
with gr.Column(scale=3):
|
|
|
464 |
|
465 |
# Chat interface
|
466 |
chat = gr.ChatInterface(
|
467 |
+
fn=generate_response,
|
468 |
chatbot=gr.Chatbot(elem_id="chatbot", height=500, type="messages"),
|
469 |
additional_inputs=[
|
470 |
system_prompt, temperature, top_p, top_k, max_new,
|
|
|
474 |
title=None, # Title already in markdown
|
475 |
description=None, # Description already in markdown
|
476 |
cache_examples=False,
|
|
|
477 |
)
|
478 |
|
479 |
gr.Markdown(
|
|
|
500 |
)
|
501 |
|
502 |
if __name__ == "__main__":
|
503 |
+
# Simple queue configuration
|
504 |
demo.queue(
|
505 |
+
max_size=10,
|
|
|
506 |
).launch(
|
507 |
server_name="0.0.0.0",
|
508 |
server_port=7860,
|
509 |
share=False,
|
|
|
510 |
)
|