matthartman commited on
Commit
d83f09b
·
verified ·
1 Parent(s): 15cfdb0

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastRTC + Gemma-3 minimal voice chat app
3
+ Requirements:
4
+ pip install fastrtc transformers torch torchaudio
5
+ """
6
+
7
+ import asyncio
8
+ from typing import AsyncGenerator
9
+
10
+ from fastrtc import (
11
+ ReplyOnPause,
12
+ Stream,
13
+ get_stt_model,
14
+ get_tts_model,
15
+ wait_for_item,
16
+ )
17
+ import torch
18
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
19
+
20
+ # ------------------------------------------------------------------
21
+ # 1. Load Gemma-3 (4b-it) via transformers
22
+ # ------------------------------------------------------------------
23
+ MODEL_ID = "google/gemma-3-4b-it"
24
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ MODEL_ID,
27
+ torch_dtype=torch.bfloat16,
28
+ device_map="auto",
29
+ trust_remote_code=True,
30
+ )
31
+
32
+ # ------------------------------------------------------------------
33
+ # 2. Build a simple chat pipeline
34
+ # ------------------------------------------------------------------
35
+ chat_pipeline = pipeline(
36
+ "text-generation",
37
+ model=model,
38
+ tokenizer=tokenizer,
39
+ max_new_tokens=256,
40
+ do_sample=True,
41
+ temperature=0.7,
42
+ )
43
+
44
+ # ------------------------------------------------------------------
45
+ # 3. Voice pipeline helpers
46
+ # ------------------------------------------------------------------
47
+ stt = get_stt_model("tiny")
48
+ tts = get_tts_model("coqui/XTTS-v2", lang="en")
49
+
50
+
51
+ # ------------------------------------------------------------------
52
+ # 4. Response generator
53
+ # ------------------------------------------------------------------
54
+ def response_generator(prompt: str) -> str:
55
+ """Feed the user prompt to Gemma-3 and return the assistant text."""
56
+ messages = [{"role": "user", "content": prompt}]
57
+ prompt_text = tokenizer.apply_chat_template(
58
+ messages, tokenize=False, add_generation_prompt=True
59
+ )
60
+ output = chat_pipeline(prompt_text)[0]["generated_text"]
61
+ # strip the prompt from the output
62
+ return output[len(prompt_text) :].strip()
63
+
64
+
65
+ # ------------------------------------------------------------------
66
+ # 5. FastRTC streaming handler
67
+ # ------------------------------------------------------------------
68
+ async def chat_handler(
69
+ audio: AsyncGenerator,
70
+ ) -> AsyncGenerator[bytes, None]:
71
+ """Receive user voice, transcribe, answer via Gemma-3, stream back TTS audio."""
72
+ async for user_text in stt.transcribe(audio):
73
+ if not user_text.strip():
74
+ continue
75
+
76
+ # Generate response
77
+ reply_text = response_generator(user_text)
78
+
79
+ # Stream TTS audio back to the user
80
+ async for chunk in tts.synthesize(reply_text):
81
+ yield chunk
82
+
83
+
84
+ # ------------------------------------------------------------------
85
+ # 6. Launch the app
86
+ # ------------------------------------------------------------------
87
+ if __name__ == "__main__":
88
+ stream = Stream(
89
+ handler=ReplyOnPause(chat_handler),
90
+ modality="audio",
91
+ mode="send-receive",
92
+ )
93
+ stream.ui.launch(server_name="0.0.0.0", server_port=7860)