Spaces:

akhaliq
/

VibeVoice-1.5B

Sleeping

App Files Files Community

akhaliq HF Staff commited on 8 days ago

Commit

ad908e5

verified ·

1 Parent(s): 8b711a8

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +34 -16

app.py CHANGED Viewed

@@ -101,12 +101,19 @@ class VibeVoiceChat:
     def __init__(self, model_path: str, device: str = "cuda", inference_steps: int = 5):
         """Initialize the VibeVoice chat model."""
         self.model_path = model_path
-        self.device = device
         self.inference_steps = inference_steps
         self.is_generating = False
         self.stop_generation = False
         self.current_streamer = None
         self.load_model()
         self.setup_voice_presets()
@@ -116,11 +123,20 @@ class VibeVoiceChat:
         self.processor = VibeVoiceProcessor.from_pretrained(self.model_path)
-        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
-            self.model_path,
-            torch_dtype=torch.bfloat16,
-            device_map='cuda',
-        )
         self.model.eval()
         # Configure noise scheduler
@@ -244,6 +260,10 @@ class VibeVoiceChat:
                 return_attention_mask=True,
             )
             # Create audio streamer
             audio_streamer = AudioStreamer(
                 batch_size=1,
@@ -297,6 +317,8 @@ class VibeVoiceChat:
         except Exception as e:
             print(f"Error in generation: {e}")
             self.is_generating = False
             self.current_streamer = None
             yield None
@@ -320,6 +342,8 @@ class VibeVoiceChat:
             )
         except Exception as e:
             print(f"Error in generation thread: {e}")
             audio_streamer.end()
     def convert_to_16_bit_wav(self, data):
@@ -385,6 +409,8 @@ def create_chat_interface(chat_instance: VibeVoiceChat):
                 return gr.Audio(value=None)
         except Exception as e:
             print(f"Error in chat_fn: {e}")
             return gr.Audio(value=None)
     # Create additional inputs
@@ -419,21 +445,12 @@ def create_chat_interface(chat_instance: VibeVoiceChat):
         )
     ]
-    # Example conversations - formatted as list of lists when additional_inputs are provided
-    examples = [
-        ["Welcome to our AI podcast! Today we're discussing the future of technology.", default_voice_1, default_voice_2, 2, 1.3],
-        ["Speaker 0: What's your favorite programming language?\nSpeaker 1: I really enjoy Python for its simplicity.", default_voice_1, default_voice_2, 2, 1.3],
-        ["Tell me an interesting fact about space exploration.", default_voice_1, default_voice_1, 1, 1.3],
-        ["Speaker 0: How do you stay productive?\nSpeaker 1: I use the Pomodoro technique and take regular breaks.", default_voice_1, default_voice_2, 2, 1.3],
-    ]
-    # Create the ChatInterface
     interface = gr.ChatInterface(
         fn=chat_fn,
         type="messages",
         title="🎙️ VibeVoice Chat",
         description="Generate natural dialogue audio with AI voices. Type your message or paste a script!",
-        examples=examples,
         additional_inputs=additional_inputs,
         additional_inputs_accordion=gr.Accordion(label="Voice & Generation Settings", open=True),
         submit_btn="🎵 Generate Audio",
@@ -514,6 +531,7 @@ def main():
     print(f"🚀 Launching chat interface on port {args.port}")
     print(f"📁 Model: {args.model_path}")
     print(f"🎭 Available voices: {len(chat_instance.available_voices)}")
     # Launch the interface

     def __init__(self, model_path: str, device: str = "cuda", inference_steps: int = 5):
         """Initialize the VibeVoice chat model."""
         self.model_path = model_path
+        self.device = device if torch.cuda.is_available() else "cpu"
         self.inference_steps = inference_steps
         self.is_generating = False
         self.stop_generation = False
         self.current_streamer = None
+        # Check GPU availability
+        if torch.cuda.is_available():
+            print(f"✓ GPU detected: {torch.cuda.get_device_name(0)}")
+            print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
+        else:
+            print("✗ No GPU detected, using CPU (generation will be slower)")
         self.load_model()
         self.setup_voice_presets()
         self.processor = VibeVoiceProcessor.from_pretrained(self.model_path)
+        if torch.cuda.is_available():
+            self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
+                self.model_path,
+                torch_dtype=torch.bfloat16,
+                device_map='cuda',
+                attn_implementation="flash_attention_2",
+            )
+        else:
+            self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
+                self.model_path,
+                torch_dtype=torch.float32,
+                device_map='cpu',
+            )
         self.model.eval()
         # Configure noise scheduler
                 return_attention_mask=True,
             )
+            # Move to device
+            if self.device == "cuda":
+                inputs = {k: v.to(self.device) if torch.is_tensor(v) else v for k, v in inputs.items()}
             # Create audio streamer
             audio_streamer = AudioStreamer(
                 batch_size=1,
         except Exception as e:
             print(f"Error in generation: {e}")
+            import traceback
+            traceback.print_exc()
             self.is_generating = False
             self.current_streamer = None
             yield None
             )
         except Exception as e:
             print(f"Error in generation thread: {e}")
+            import traceback
+            traceback.print_exc()
             audio_streamer.end()
     def convert_to_16_bit_wav(self, data):
                 return gr.Audio(value=None)
         except Exception as e:
             print(f"Error in chat_fn: {e}")
+            import traceback
+            traceback.print_exc()
             return gr.Audio(value=None)
     # Create additional inputs
         )
     ]
+    # Create the ChatInterface without examples to avoid the error
     interface = gr.ChatInterface(
         fn=chat_fn,
         type="messages",
         title="🎙️ VibeVoice Chat",
         description="Generate natural dialogue audio with AI voices. Type your message or paste a script!",
         additional_inputs=additional_inputs,
         additional_inputs_accordion=gr.Accordion(label="Voice & Generation Settings", open=True),
         submit_btn="🎵 Generate Audio",
     print(f"🚀 Launching chat interface on port {args.port}")
     print(f"📁 Model: {args.model_path}")
+    print(f"💻 Device: {chat_instance.device}")
     print(f"🎭 Available voices: {len(chat_instance.available_voices)}")
     # Launch the interface