MoraxCheng commited on
Commit
5b1db8f
·
1 Parent(s): 86b2ecb

Implement runtime mode tracking and retry mechanism for Zero GPU initialization, falling back to CPU mode if necessary

Browse files
Files changed (1) hide show
  1. app.py +47 -8
app.py CHANGED
@@ -39,6 +39,9 @@ except Exception as e:
39
  print(f"Zero GPU initialization warning: {e}")
40
  print("Running without Zero GPU support")
41
 
 
 
 
42
  # Keep-alive state
43
  last_activity = datetime.datetime.now()
44
  activity_lock = threading.Lock()
@@ -437,10 +440,14 @@ with tranception_design:
437
  with gr.Row():
438
  with gr.Column(scale=1):
439
  def get_gpu_status():
 
440
  with activity_lock:
441
  time_since = (datetime.datetime.now() - last_activity).total_seconds()
442
 
443
- status = "🔥 Zero GPU" if SPACES_AVAILABLE else "💻 CPU Mode"
 
 
 
444
  return f"{status} | Last activity: {int(time_since)}s ago"
445
 
446
  gpu_status = gr.Textbox(
@@ -546,10 +553,42 @@ if __name__ == "__main__":
546
  print("Note: Models will be downloaded on first use")
547
  print("Zero GPU spaces may sleep after ~15 minutes of inactivity")
548
 
549
- # Launch with queue for proper Zero GPU support
550
- tranception_design.queue(max_size=20).launch(
551
- server_name="0.0.0.0",
552
- server_port=7860,
553
- show_error=True,
554
- share=False
555
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  print(f"Zero GPU initialization warning: {e}")
40
  print("Running without Zero GPU support")
41
 
42
+ # Runtime mode tracking
43
+ RUNTIME_MODE = "GPU" if SPACES_AVAILABLE else "CPU"
44
+
45
  # Keep-alive state
46
  last_activity = datetime.datetime.now()
47
  activity_lock = threading.Lock()
 
440
  with gr.Row():
441
  with gr.Column(scale=1):
442
  def get_gpu_status():
443
+ global RUNTIME_MODE
444
  with activity_lock:
445
  time_since = (datetime.datetime.now() - last_activity).total_seconds()
446
 
447
+ if RUNTIME_MODE == "GPU":
448
+ status = "🔥 Zero GPU"
449
+ else:
450
+ status = "💻 CPU Mode (GPU initialization failed)"
451
  return f"{status} | Last activity: {int(time_since)}s ago"
452
 
453
  gpu_status = gr.Textbox(
 
553
  print("Note: Models will be downloaded on first use")
554
  print("Zero GPU spaces may sleep after ~15 minutes of inactivity")
555
 
556
+ # Try to launch with ZeroGPU support first
557
+ launch_success = False
558
+ max_retries = 3
559
+ retry_count = 0
560
+
561
+ while not launch_success and retry_count < max_retries:
562
+ try:
563
+ if retry_count > 0:
564
+ print(f"Retry attempt {retry_count}/{max_retries}...")
565
+ time.sleep(2) # Wait before retry
566
+
567
+ # Launch with queue for proper Zero GPU support
568
+ tranception_design.queue(max_size=20).launch(
569
+ server_name="0.0.0.0",
570
+ server_port=7860,
571
+ show_error=True,
572
+ share=False
573
+ )
574
+ launch_success = True
575
+ except RuntimeError as e:
576
+ if "Error while initializing ZeroGPU" in str(e):
577
+ retry_count += 1
578
+ if retry_count >= max_retries:
579
+ print(f"ZeroGPU initialization failed after {max_retries} attempts")
580
+ print("Falling back to CPU mode for stability")
581
+ print("Note: The app will run slower in CPU mode")
582
+ # Update runtime mode
583
+ RUNTIME_MODE = "CPU"
584
+ # Launch without queue which doesn't trigger ZeroGPU initialization
585
+ tranception_design.launch(
586
+ server_name="0.0.0.0",
587
+ server_port=7860,
588
+ show_error=True,
589
+ share=False
590
+ )
591
+ launch_success = True
592
+ else:
593
+ # Re-raise unexpected errors
594
+ raise