Spaces:
Runtime error
Runtime error
Upload app.py with huggingface_hub
Browse files
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
FastRTC + Gemma-3 minimal voice chat app
|
3 |
+
Requirements:
|
4 |
+
pip install fastrtc transformers torch torchaudio
|
5 |
+
"""
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
from typing import AsyncGenerator
|
9 |
+
|
10 |
+
from fastrtc import (
|
11 |
+
ReplyOnPause,
|
12 |
+
Stream,
|
13 |
+
get_stt_model,
|
14 |
+
get_tts_model,
|
15 |
+
wait_for_item,
|
16 |
+
)
|
17 |
+
import torch
|
18 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
19 |
+
|
20 |
+
# ------------------------------------------------------------------
|
21 |
+
# 1. Load Gemma-3 (4b-it) via transformers
|
22 |
+
# ------------------------------------------------------------------
|
23 |
+
MODEL_ID = "google/gemma-3-4b-it"
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
25 |
+
model = AutoModelForCausalLM.from_pretrained(
|
26 |
+
MODEL_ID,
|
27 |
+
torch_dtype=torch.bfloat16,
|
28 |
+
device_map="auto",
|
29 |
+
trust_remote_code=True,
|
30 |
+
)
|
31 |
+
|
32 |
+
# ------------------------------------------------------------------
|
33 |
+
# 2. Build a simple chat pipeline
|
34 |
+
# ------------------------------------------------------------------
|
35 |
+
chat_pipeline = pipeline(
|
36 |
+
"text-generation",
|
37 |
+
model=model,
|
38 |
+
tokenizer=tokenizer,
|
39 |
+
max_new_tokens=256,
|
40 |
+
do_sample=True,
|
41 |
+
temperature=0.7,
|
42 |
+
)
|
43 |
+
|
44 |
+
# ------------------------------------------------------------------
|
45 |
+
# 3. Voice pipeline helpers
|
46 |
+
# ------------------------------------------------------------------
|
47 |
+
stt = get_stt_model("tiny")
|
48 |
+
tts = get_tts_model("coqui/XTTS-v2", lang="en")
|
49 |
+
|
50 |
+
|
51 |
+
# ------------------------------------------------------------------
|
52 |
+
# 4. Response generator
|
53 |
+
# ------------------------------------------------------------------
|
54 |
+
def response_generator(prompt: str) -> str:
|
55 |
+
"""Feed the user prompt to Gemma-3 and return the assistant text."""
|
56 |
+
messages = [{"role": "user", "content": prompt}]
|
57 |
+
prompt_text = tokenizer.apply_chat_template(
|
58 |
+
messages, tokenize=False, add_generation_prompt=True
|
59 |
+
)
|
60 |
+
output = chat_pipeline(prompt_text)[0]["generated_text"]
|
61 |
+
# strip the prompt from the output
|
62 |
+
return output[len(prompt_text) :].strip()
|
63 |
+
|
64 |
+
|
65 |
+
# ------------------------------------------------------------------
|
66 |
+
# 5. FastRTC streaming handler
|
67 |
+
# ------------------------------------------------------------------
|
68 |
+
async def chat_handler(
|
69 |
+
audio: AsyncGenerator,
|
70 |
+
) -> AsyncGenerator[bytes, None]:
|
71 |
+
"""Receive user voice, transcribe, answer via Gemma-3, stream back TTS audio."""
|
72 |
+
async for user_text in stt.transcribe(audio):
|
73 |
+
if not user_text.strip():
|
74 |
+
continue
|
75 |
+
|
76 |
+
# Generate response
|
77 |
+
reply_text = response_generator(user_text)
|
78 |
+
|
79 |
+
# Stream TTS audio back to the user
|
80 |
+
async for chunk in tts.synthesize(reply_text):
|
81 |
+
yield chunk
|
82 |
+
|
83 |
+
|
84 |
+
# ------------------------------------------------------------------
|
85 |
+
# 6. Launch the app
|
86 |
+
# ------------------------------------------------------------------
|
87 |
+
if __name__ == "__main__":
|
88 |
+
stream = Stream(
|
89 |
+
handler=ReplyOnPause(chat_handler),
|
90 |
+
modality="audio",
|
91 |
+
mode="send-receive",
|
92 |
+
)
|
93 |
+
stream.ui.launch(server_name="0.0.0.0", server_port=7860)
|