Spaces:

matthartman
/

my-fast-rtc-app

Runtime error

App Files Files Community

matthartman commited on 15 days ago

Commit

d83f09b

verified ·

1 Parent(s): 15cfdb0

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +93 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+"""
+FastRTC + Gemma-3 minimal voice chat app
+Requirements:
+    pip install fastrtc transformers torch torchaudio
+"""
+import asyncio
+from typing import AsyncGenerator
+from fastrtc import (
+    ReplyOnPause,
+    Stream,
+    get_stt_model,
+    get_tts_model,
+    wait_for_item,
+)
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# ------------------------------------------------------------------
+# 1.  Load Gemma-3 (4b-it) via transformers
+# ------------------------------------------------------------------
+MODEL_ID = "google/gemma-3-4b-it"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    trust_remote_code=True,
+)
+# ------------------------------------------------------------------
+# 2.  Build a simple chat pipeline
+# ------------------------------------------------------------------
+chat_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=256,
+    do_sample=True,
+    temperature=0.7,
+)
+# ------------------------------------------------------------------
+# 3.  Voice pipeline helpers
+# ------------------------------------------------------------------
+stt = get_stt_model("tiny")
+tts = get_tts_model("coqui/XTTS-v2", lang="en")
+# ------------------------------------------------------------------
+# 4.  Response generator
+# ------------------------------------------------------------------
+def response_generator(prompt: str) -> str:
+    """Feed the user prompt to Gemma-3 and return the assistant text."""
+    messages = [{"role": "user", "content": prompt}]
+    prompt_text = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    output = chat_pipeline(prompt_text)[0]["generated_text"]
+    # strip the prompt from the output
+    return output[len(prompt_text) :].strip()
+# ------------------------------------------------------------------
+# 5.  FastRTC streaming handler
+# ------------------------------------------------------------------
+async def chat_handler(
+    audio: AsyncGenerator,
+) -> AsyncGenerator[bytes, None]:
+    """Receive user voice, transcribe, answer via Gemma-3, stream back TTS audio."""
+    async for user_text in stt.transcribe(audio):
+        if not user_text.strip():
+            continue
+        # Generate response
+        reply_text = response_generator(user_text)
+        # Stream TTS audio back to the user
+        async for chunk in tts.synthesize(reply_text):
+            yield chunk
+# ------------------------------------------------------------------
+# 6.  Launch the app
+# ------------------------------------------------------------------
+if __name__ == "__main__":
+    stream = Stream(
+        handler=ReplyOnPause(chat_handler),
+        modality="audio",
+        mode="send-receive",
+    )
+    stream.ui.launch(server_name="0.0.0.0", server_port=7860)