Spaces:
Running
Running
Merge branch 'feature/unique-session-ids' of https://github.com/DeltaZN/gradio-mcp-hackaton into feature/unique-session-ids
Browse files- src/audio/audio_generator.py +39 -30
- src/config.py +1 -1
- src/images/image_generator.py +3 -3
- src/main.py +9 -7
src/audio/audio_generator.py
CHANGED
@@ -5,18 +5,23 @@ import queue
|
|
5 |
import logging
|
6 |
import io
|
7 |
import time
|
|
|
|
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
-
|
|
|
12 |
|
13 |
async def generate_music(user_hash: str, music_tone: str, receive_audio):
|
14 |
if user_hash in sessions:
|
15 |
-
logger.info(
|
|
|
|
|
16 |
return
|
17 |
async with GoogleClientFactory.audio() as client:
|
18 |
async with (
|
19 |
-
client.live.music.connect(model=
|
20 |
asyncio.TaskGroup() as tg,
|
21 |
):
|
22 |
# Set up task to receive server messages.
|
@@ -27,26 +32,24 @@ async def generate_music(user_hash: str, music_tone: str, receive_audio):
|
|
27 |
session.set_weighted_prompts(
|
28 |
prompts=[types.WeightedPrompt(text=music_tone, weight=1.0)]
|
29 |
),
|
30 |
-
|
31 |
)
|
32 |
await asyncio.wait_for(
|
33 |
session.set_music_generation_config(
|
34 |
config=types.LiveMusicGenerationConfig(bpm=90, temperature=1.0)
|
35 |
-
|
36 |
-
|
37 |
)
|
38 |
-
await asyncio.wait_for(session.play(),
|
39 |
logger.info(
|
40 |
f"Started music generation for user hash {user_hash}, music tone: {music_tone}"
|
41 |
)
|
42 |
-
sessions[user_hash] = {
|
43 |
-
|
44 |
-
|
45 |
-
}
|
46 |
-
|
47 |
async def change_music_tone(user_hash: str, new_tone):
|
48 |
logger.info(f"Changing music tone to {new_tone}")
|
49 |
-
session = sessions.get(user_hash, {}).get(
|
50 |
if not session:
|
51 |
logger.error(f"No session found for user hash {user_hash}")
|
52 |
return
|
@@ -54,14 +57,15 @@ async def change_music_tone(user_hash: str, new_tone):
|
|
54 |
session.set_weighted_prompts(
|
55 |
prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
|
56 |
),
|
57 |
-
|
58 |
)
|
59 |
-
|
60 |
|
61 |
SAMPLE_RATE = 48000
|
62 |
NUM_CHANNELS = 2 # Stereo
|
63 |
SAMPLE_WIDTH = 2 # 16-bit audio -> 2 bytes per sample
|
64 |
|
|
|
65 |
async def receive_audio(session, user_hash):
|
66 |
"""Process incoming audio from the music generation."""
|
67 |
while True:
|
@@ -69,7 +73,7 @@ async def receive_audio(session, user_hash):
|
|
69 |
async for message in session.receive():
|
70 |
if message.server_content and message.server_content.audio_chunks:
|
71 |
audio_data = message.server_content.audio_chunks[0].data
|
72 |
-
queue = sessions[user_hash][
|
73 |
# audio_data is already bytes (raw PCM)
|
74 |
await asyncio.to_thread(queue.put, audio_data)
|
75 |
await asyncio.sleep(10**-12)
|
@@ -77,42 +81,47 @@ async def receive_audio(session, user_hash):
|
|
77 |
logger.error(f"Error in receive_audio: {e}")
|
78 |
break
|
79 |
|
|
|
80 |
sessions = {}
|
81 |
|
|
|
82 |
async def start_music_generation(user_hash: str, music_tone: str):
|
83 |
"""Start the music generation in a separate thread."""
|
84 |
await generate_music(user_hash, music_tone, receive_audio)
|
85 |
-
|
|
|
86 |
async def cleanup_music_session(user_hash: str):
|
87 |
if user_hash in sessions:
|
88 |
logger.info(f"Cleaning up music session for user hash {user_hash}")
|
89 |
-
session = sessions[user_hash][
|
90 |
-
await asyncio.wait_for(session.stop(),
|
91 |
-
await asyncio.wait_for(session.close(),
|
92 |
del sessions[user_hash]
|
93 |
-
|
94 |
|
95 |
def update_audio(user_hash):
|
96 |
"""Continuously stream audio from the queue as WAV bytes."""
|
97 |
if user_hash == "":
|
98 |
return
|
99 |
-
|
100 |
logger.info(f"Starting audio update loop for user hash: {user_hash}")
|
101 |
while True:
|
102 |
if user_hash not in sessions:
|
103 |
time.sleep(0.5)
|
104 |
continue
|
105 |
-
queue = sessions[user_hash][
|
106 |
-
pcm_data = queue.get()
|
107 |
-
|
108 |
if not isinstance(pcm_data, bytes):
|
109 |
-
logger.warning(
|
|
|
|
|
110 |
continue
|
111 |
|
112 |
# Lyria provides stereo, 16-bit PCM at 48kHz.
|
113 |
# Ensure the number of bytes is consistent with stereo 16-bit audio.
|
114 |
# Each frame = NUM_CHANNELS * SAMPLE_WIDTH bytes.
|
115 |
-
# If len(pcm_data) is not a multiple of (NUM_CHANNELS * SAMPLE_WIDTH),
|
116 |
# it might indicate an incomplete chunk or an issue.
|
117 |
bytes_per_frame = NUM_CHANNELS * SAMPLE_WIDTH
|
118 |
if len(pcm_data) % bytes_per_frame != 0:
|
@@ -121,12 +130,12 @@ def update_audio(user_hash):
|
|
121 |
f"bytes_per_frame ({bytes_per_frame}). This might cause issues with WAV formatting."
|
122 |
)
|
123 |
# Depending on strictness, you might want to skip this chunk:
|
124 |
-
# continue
|
125 |
|
126 |
wav_buffer = io.BytesIO()
|
127 |
-
with wave.open(wav_buffer,
|
128 |
wf.setnchannels(NUM_CHANNELS)
|
129 |
-
wf.setsampwidth(SAMPLE_WIDTH)
|
130 |
wf.setframerate(SAMPLE_RATE)
|
131 |
wf.writeframes(pcm_data)
|
132 |
wav_bytes = wav_buffer.getvalue()
|
|
|
5 |
import logging
|
6 |
import io
|
7 |
import time
|
8 |
+
from config import settings
|
9 |
+
from services.google import GoogleClientFactory
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
+
|
14 |
+
|
15 |
|
16 |
async def generate_music(user_hash: str, music_tone: str, receive_audio):
|
17 |
if user_hash in sessions:
|
18 |
+
logger.info(
|
19 |
+
f"Music generation already started for user hash {user_hash}, skipping new generation"
|
20 |
+
)
|
21 |
return
|
22 |
async with GoogleClientFactory.audio() as client:
|
23 |
async with (
|
24 |
+
client.live.music.connect(model="models/lyria-realtime-exp") as session,
|
25 |
asyncio.TaskGroup() as tg,
|
26 |
):
|
27 |
# Set up task to receive server messages.
|
|
|
32 |
session.set_weighted_prompts(
|
33 |
prompts=[types.WeightedPrompt(text=music_tone, weight=1.0)]
|
34 |
),
|
35 |
+
settings.request_timeout,
|
36 |
)
|
37 |
await asyncio.wait_for(
|
38 |
session.set_music_generation_config(
|
39 |
config=types.LiveMusicGenerationConfig(bpm=90, temperature=1.0)
|
40 |
+
),
|
41 |
+
settings.request_timeout,
|
42 |
)
|
43 |
+
await asyncio.wait_for(session.play(), settings.request_timeout)
|
44 |
logger.info(
|
45 |
f"Started music generation for user hash {user_hash}, music tone: {music_tone}"
|
46 |
)
|
47 |
+
sessions[user_hash] = {"session": session, "queue": queue.Queue()}
|
48 |
+
|
49 |
+
|
|
|
|
|
50 |
async def change_music_tone(user_hash: str, new_tone):
|
51 |
logger.info(f"Changing music tone to {new_tone}")
|
52 |
+
session = sessions.get(user_hash, {}).get("session")
|
53 |
if not session:
|
54 |
logger.error(f"No session found for user hash {user_hash}")
|
55 |
return
|
|
|
57 |
session.set_weighted_prompts(
|
58 |
prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
|
59 |
),
|
60 |
+
settings.request_timeout,
|
61 |
)
|
62 |
+
|
63 |
|
64 |
SAMPLE_RATE = 48000
|
65 |
NUM_CHANNELS = 2 # Stereo
|
66 |
SAMPLE_WIDTH = 2 # 16-bit audio -> 2 bytes per sample
|
67 |
|
68 |
+
|
69 |
async def receive_audio(session, user_hash):
|
70 |
"""Process incoming audio from the music generation."""
|
71 |
while True:
|
|
|
73 |
async for message in session.receive():
|
74 |
if message.server_content and message.server_content.audio_chunks:
|
75 |
audio_data = message.server_content.audio_chunks[0].data
|
76 |
+
queue = sessions[user_hash]["queue"]
|
77 |
# audio_data is already bytes (raw PCM)
|
78 |
await asyncio.to_thread(queue.put, audio_data)
|
79 |
await asyncio.sleep(10**-12)
|
|
|
81 |
logger.error(f"Error in receive_audio: {e}")
|
82 |
break
|
83 |
|
84 |
+
|
85 |
sessions = {}
|
86 |
|
87 |
+
|
88 |
async def start_music_generation(user_hash: str, music_tone: str):
|
89 |
"""Start the music generation in a separate thread."""
|
90 |
await generate_music(user_hash, music_tone, receive_audio)
|
91 |
+
|
92 |
+
|
93 |
async def cleanup_music_session(user_hash: str):
|
94 |
if user_hash in sessions:
|
95 |
logger.info(f"Cleaning up music session for user hash {user_hash}")
|
96 |
+
session = sessions[user_hash]["session"]
|
97 |
+
await asyncio.wait_for(session.stop(), settings.request_timeout)
|
98 |
+
await asyncio.wait_for(session.close(), settings.request_timeout)
|
99 |
del sessions[user_hash]
|
100 |
+
|
101 |
|
102 |
def update_audio(user_hash):
|
103 |
"""Continuously stream audio from the queue as WAV bytes."""
|
104 |
if user_hash == "":
|
105 |
return
|
106 |
+
|
107 |
logger.info(f"Starting audio update loop for user hash: {user_hash}")
|
108 |
while True:
|
109 |
if user_hash not in sessions:
|
110 |
time.sleep(0.5)
|
111 |
continue
|
112 |
+
queue = sessions[user_hash]["queue"]
|
113 |
+
pcm_data = queue.get() # This is raw PCM audio bytes
|
114 |
+
|
115 |
if not isinstance(pcm_data, bytes):
|
116 |
+
logger.warning(
|
117 |
+
f"Expected bytes from audio_queue, got {type(pcm_data)}. Skipping."
|
118 |
+
)
|
119 |
continue
|
120 |
|
121 |
# Lyria provides stereo, 16-bit PCM at 48kHz.
|
122 |
# Ensure the number of bytes is consistent with stereo 16-bit audio.
|
123 |
# Each frame = NUM_CHANNELS * SAMPLE_WIDTH bytes.
|
124 |
+
# If len(pcm_data) is not a multiple of (NUM_CHANNELS * SAMPLE_WIDTH),
|
125 |
# it might indicate an incomplete chunk or an issue.
|
126 |
bytes_per_frame = NUM_CHANNELS * SAMPLE_WIDTH
|
127 |
if len(pcm_data) % bytes_per_frame != 0:
|
|
|
130 |
f"bytes_per_frame ({bytes_per_frame}). This might cause issues with WAV formatting."
|
131 |
)
|
132 |
# Depending on strictness, you might want to skip this chunk:
|
133 |
+
# continue
|
134 |
|
135 |
wav_buffer = io.BytesIO()
|
136 |
+
with wave.open(wav_buffer, "wb") as wf:
|
137 |
wf.setnchannels(NUM_CHANNELS)
|
138 |
+
wf.setsampwidth(SAMPLE_WIDTH) # Corresponds to 16-bit audio
|
139 |
wf.setframerate(SAMPLE_RATE)
|
140 |
wf.writeframes(pcm_data)
|
141 |
wav_bytes = wav_buffer.getvalue()
|
src/config.py
CHANGED
@@ -29,6 +29,6 @@ class AppSettings(BaseAppSettings):
|
|
29 |
top_p: float = 0.95
|
30 |
temperature: float = 0.5
|
31 |
pregenerate_next_scene: bool = True
|
32 |
-
|
33 |
|
34 |
settings = AppSettings()
|
|
|
29 |
top_p: float = 0.95
|
30 |
temperature: float = 0.5
|
31 |
pregenerate_next_scene: bool = True
|
32 |
+
request_timeout: int = 20
|
33 |
|
34 |
settings = AppSettings()
|
src/images/image_generator.py
CHANGED
@@ -6,7 +6,7 @@ from datetime import datetime
|
|
6 |
import logging
|
7 |
import asyncio
|
8 |
import gradio as gr
|
9 |
-
|
10 |
from services.google import GoogleClientFactory
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
@@ -58,7 +58,7 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
|
|
58 |
safety_settings=safety_settings,
|
59 |
),
|
60 |
),
|
61 |
-
|
62 |
)
|
63 |
|
64 |
# Process the response parts
|
@@ -125,7 +125,7 @@ async def modify_image(image_path: str, modification_prompt: str) -> str | None:
|
|
125 |
safety_settings=safety_settings,
|
126 |
),
|
127 |
),
|
128 |
-
|
129 |
)
|
130 |
|
131 |
# Process the response parts
|
|
|
6 |
import logging
|
7 |
import asyncio
|
8 |
import gradio as gr
|
9 |
+
from config import settings
|
10 |
from services.google import GoogleClientFactory
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
|
|
58 |
safety_settings=safety_settings,
|
59 |
),
|
60 |
),
|
61 |
+
settings.request_timeout,
|
62 |
)
|
63 |
|
64 |
# Process the response parts
|
|
|
125 |
safety_settings=safety_settings,
|
126 |
),
|
127 |
),
|
128 |
+
settings.request_timeout,
|
129 |
)
|
130 |
|
131 |
# Process the response parts
|
src/main.py
CHANGED
@@ -136,7 +136,7 @@ with gr.Blocks(
|
|
136 |
with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
|
137 |
gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
|
138 |
|
139 |
-
|
140 |
|
141 |
# Constructor Interface (visible by default)
|
142 |
with gr.Column(
|
@@ -313,7 +313,7 @@ with gr.Blocks(
|
|
313 |
start_btn.click(
|
314 |
fn=start_game_with_music,
|
315 |
inputs=[
|
316 |
-
|
317 |
setting_description,
|
318 |
char_name,
|
319 |
char_age,
|
@@ -330,13 +330,14 @@ with gr.Blocks(
|
|
330 |
game_image,
|
331 |
game_choices,
|
332 |
custom_choice,
|
|
|
333 |
],
|
334 |
concurrency_limit=CONCURRENCY_LIMIT,
|
335 |
)
|
336 |
|
337 |
back_btn.click(
|
338 |
fn=return_to_constructor,
|
339 |
-
inputs=[
|
340 |
outputs=[
|
341 |
loading_indicator,
|
342 |
constructor_interface,
|
@@ -347,7 +348,7 @@ with gr.Blocks(
|
|
347 |
|
348 |
custom_choice.submit(
|
349 |
fn=update_scene,
|
350 |
-
inputs=[
|
351 |
outputs=[game_text, game_image, game_choices, custom_choice],
|
352 |
concurrency_limit=CONCURRENCY_LIMIT,
|
353 |
)
|
@@ -356,14 +357,15 @@ with gr.Blocks(
|
|
356 |
demo.load(
|
357 |
fn=generate_user_hash,
|
358 |
inputs=[],
|
359 |
-
outputs=[
|
360 |
)
|
361 |
-
|
362 |
fn=update_audio,
|
363 |
-
inputs=[],
|
364 |
outputs=[audio_out],
|
365 |
concurrency_limit=CONCURRENCY_LIMIT,
|
366 |
)
|
367 |
|
|
|
368 |
demo.queue()
|
369 |
demo.launch(ssr_mode=False)
|
|
|
136 |
with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
|
137 |
gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
|
138 |
|
139 |
+
ls_user_hash = gr.BrowserState("", "user_hash")
|
140 |
|
141 |
# Constructor Interface (visible by default)
|
142 |
with gr.Column(
|
|
|
313 |
start_btn.click(
|
314 |
fn=start_game_with_music,
|
315 |
inputs=[
|
316 |
+
ls_user_hash,
|
317 |
setting_description,
|
318 |
char_name,
|
319 |
char_age,
|
|
|
330 |
game_image,
|
331 |
game_choices,
|
332 |
custom_choice,
|
333 |
+
ls_user_hash,
|
334 |
],
|
335 |
concurrency_limit=CONCURRENCY_LIMIT,
|
336 |
)
|
337 |
|
338 |
back_btn.click(
|
339 |
fn=return_to_constructor,
|
340 |
+
inputs=[ls_user_hash],
|
341 |
outputs=[
|
342 |
loading_indicator,
|
343 |
constructor_interface,
|
|
|
348 |
|
349 |
custom_choice.submit(
|
350 |
fn=update_scene,
|
351 |
+
inputs=[ls_user_hash, custom_choice],
|
352 |
outputs=[game_text, game_image, game_choices, custom_choice],
|
353 |
concurrency_limit=CONCURRENCY_LIMIT,
|
354 |
)
|
|
|
357 |
demo.load(
|
358 |
fn=generate_user_hash,
|
359 |
inputs=[],
|
360 |
+
outputs=[ls_user_hash],
|
361 |
)
|
362 |
+
ls_user_hash.change(
|
363 |
fn=update_audio,
|
364 |
+
inputs=[ls_user_hash],
|
365 |
outputs=[audio_out],
|
366 |
concurrency_limit=CONCURRENCY_LIMIT,
|
367 |
)
|
368 |
|
369 |
+
|
370 |
demo.queue()
|
371 |
demo.launch(ssr_mode=False)
|