Spaces:

Agents-MCP-Hackathon
/

LLMGameHub

Running

App Files Files Community

gsavin commited on Jun 4

Commit

4310b90

1 Parent(s): d9ec72e

feat: improve image generation

Browse files

Files changed (10) hide show

src/agent/llm.py +32 -1
src/agent/llm_agent.py +58 -23
src/audio/audio_generator.py +5 -5
src/config.py +3 -0
src/css.py +6 -7
src/game_constructor.py +29 -12
src/game_setting.py +13 -0
src/game_state.py +3 -3
src/images/image_generator.py +55 -27
src/main.py +41 -15

src/agent/llm.py CHANGED Viewed

@@ -12,7 +12,7 @@ def create_llm(temperature: float = settings.temperature, top_p: float = setting
     global _google_api_keys_list, _current_google_key_idx
     if not _google_api_keys_list:
-        api_keys_str = settings.gemini_api_key.get_secret_value()
         if api_keys_str:
             _google_api_keys_list = [key.strip() for key in api_keys_str.split(',') if key.strip()]
@@ -38,6 +38,37 @@ def create_llm(temperature: float = settings.temperature, top_p: float = setting
         top_p=top_p,
         thinking_budget=1024
     )
 def create_precise_llm():
     return create_llm(temperature=0, top_p=1)

     global _google_api_keys_list, _current_google_key_idx
     if not _google_api_keys_list:
+        api_keys_str = settings.gemini_api_keys.get_secret_value()
         if api_keys_str:
             _google_api_keys_list = [key.strip() for key in api_keys_str.split(',') if key.strip()]
         top_p=top_p,
         thinking_budget=1024
     )
+def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
+    global _google_api_keys_list, _current_google_key_idx
+    if not _google_api_keys_list:
+        api_keys_str = settings.gemini_api_keys.get_secret_value()
+        if api_keys_str:
+            _google_api_keys_list = [key.strip() for key in api_keys_str.split(',') if key.strip()]
+        if not _google_api_keys_list:
+            logger.error("Google API keys are not configured or are empty in settings.")
+            raise ValueError("Google API keys are not configured or are invalid for round-robin.")
+    if not _google_api_keys_list: # Safeguard, though previous block should handle it.
+        logger.error("No Google API keys available for round-robin.")
+        raise ValueError("No Google API keys available for round-robin.")
+    key_index_to_use = _current_google_key_idx
+    selected_api_key = _google_api_keys_list[key_index_to_use]
+    _current_google_key_idx = (key_index_to_use + 1) % len(_google_api_keys_list)
+    logger.debug(f"Using Google API key at index {key_index_to_use} (ending with ...{selected_api_key[-4:] if len(selected_api_key) > 4 else selected_api_key}) for round-robin.")
+    return ChatGoogleGenerativeAI(
+        model="gemini-2.0-flash",
+        google_api_key=selected_api_key,
+        temperature=temperature,
+        top_p=top_p
+    )
 def create_precise_llm():
     return create_llm(temperature=0, top_p=1)

src/agent/llm_agent.py CHANGED Viewed

@@ -1,38 +1,73 @@
 from agent.llm import create_llm
 from pydantic import BaseModel, Field
-from typing import Optional, List
 import logging
 logger = logging.getLogger(__name__)
-class ChangeScene(BaseModel):
-    change_scene: bool = Field(description="Whether the scene should be changed")
-    scene_description: Optional[str] = None
-class ChangeMusic(BaseModel):
-    change_music: bool = Field(description="Whether the music should be changed")
-    music_description: Optional[str] = None
 class PlayerOption(BaseModel):
-    option_description: str = Field(description="The description of the option, Examples: [Change location] Go to the forest; [Say] Hello!")
 class LLMOutput(BaseModel):
-    change_scene: ChangeScene
-    change_music: ChangeMusic
-    game_message: str = Field(description="The message to the player, Example: You entered the forest, and you see unknown scary creatures. What do you do?")
-    player_options: List[PlayerOption] = Field(description="The list of up to 3 options for the player to choose from.")
-llm = create_llm().with_structured_output(LLMOutput)
-async def process_user_input(input: str) -> LLMOutput:
     """
     Process user input and update the state.
     """
-    logger.info(f"User's choice: {input}")
     response: LLMOutput = await llm.ainvoke(input)
-    logger.info(f"LLM response: {response}")
-    return response

 from agent.llm import create_llm
 from pydantic import BaseModel, Field
+from typing import List
 import logging
+from agent.image_agent import ChangeScene
+import asyncio
+from agent.music_agent import generate_music_prompt
+from agent.image_agent import generate_scene_image
+import uuid
 logger = logging.getLogger(__name__)
 class PlayerOption(BaseModel):
+    option_description: str = Field(
+        description="The description of the option, Examples: [Change location] Go to the forest; [Say] Hello!"
+    )
 class LLMOutput(BaseModel):
+    game_message: str = Field(
+        description="The message to the player, Example: You entered the forest, and you see unknown scary creatures. What do you do?"
+    )
+    player_options: List[PlayerOption] = Field(
+        description="The list of up to 3 options for the player to choose from."
+    )
+class MultiAgentResponse(BaseModel):
+    game_message: str = Field(
+        description="The message to the player, Example: You entered the forest, and you see unknown scary creatures. What do you do?"
+    )
+    player_options: List[PlayerOption] = Field(
+        description="The list of up to 3 options for the player to choose from."
+    )
+    music_prompt: str = Field(description="The prompt for the music generation model.")
+    change_scene: ChangeScene = Field(description="The change to the scene.")
+llm = create_llm().with_structured_output(MultiAgentResponse)
+async def process_user_input(input: str) -> MultiAgentResponse:
     """
     Process user input and update the state.
     """
+    request_id = str(uuid.uuid4())
+    logger.info(f"LLM input received: {request_id}")
     response: LLMOutput = await llm.ainvoke(input)
+    # return response
+    current_state = f"""{input}
+    Game reaction: {response.game_message}
+    Player options: {response.player_options}
+    """
+    music_prompt_task = generate_music_prompt(current_state, request_id)
+    change_scene_task = generate_scene_image(current_state, request_id)
+    music_prompt, change_scene = await asyncio.gather(music_prompt_task, change_scene_task)
+    multi_agent_response = MultiAgentResponse(
+        game_message=response.game_message,
+        player_options=response.player_options,
+        music_prompt=music_prompt,
+        change_scene=change_scene,
+    )
+    logger.info(f"LLM responded: {request_id}")
+    return multi_agent_response

src/audio/audio_generator.py CHANGED Viewed

@@ -13,10 +13,12 @@ logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value(), http_options={'api_version': 'v1alpha'})
 async def generate_music(user_hash: str, music_tone: str, receive_audio):
-      async with (
         client.aio.live.music.connect(model='models/lyria-realtime-exp') as session,
         asyncio.TaskGroup() as tg,
-      ):
         # Set up task to receive server messages.
         tg.create_task(receive_audio(session, user_hash))
@@ -31,10 +33,9 @@ async def generate_music(user_hash: str, music_tone: str, receive_audio):
         )
         await session.play()
         logger.info(f"Started music generation for user hash {user_hash}, music tone: {music_tone}")
-        await cleanup_music_session(user_hash)
         sessions[user_hash] = {
             'session': session,
-            'queue': queue.Queue(maxsize=3)
         }
 async def change_music_tone(user_hash: str, new_tone):
@@ -43,7 +44,6 @@ async def change_music_tone(user_hash: str, new_tone):
     if not session:
         logger.error(f"No session found for user hash {user_hash}")
         return
-    await session.reset_context()
     await session.set_weighted_prompts(
         prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
     )

 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value(), http_options={'api_version': 'v1alpha'})
 async def generate_music(user_hash: str, music_tone: str, receive_audio):
+    if user_hash in sessions:
+        return
+    async with (
         client.aio.live.music.connect(model='models/lyria-realtime-exp') as session,
         asyncio.TaskGroup() as tg,
+    ):
         # Set up task to receive server messages.
         tg.create_task(receive_audio(session, user_hash))
         )
         await session.play()
         logger.info(f"Started music generation for user hash {user_hash}, music tone: {music_tone}")
         sessions[user_hash] = {
             'session': session,
+            'queue': queue.Queue()
         }
 async def change_music_tone(user_hash: str, new_tone):
     if not session:
         logger.error(f"No session found for user hash {user_hash}")
         return
     await session.set_weighted_prompts(
         prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
     )

src/config.py CHANGED Viewed

@@ -21,8 +21,11 @@ class BaseAppSettings(BaseSettings):
 class AppSettings(BaseAppSettings):
     gemini_api_key: SecretStr
     top_p: float = 0.95
     temperature: float = 0.5
 settings = AppSettings()

 class AppSettings(BaseAppSettings):
     gemini_api_key: SecretStr
+    gemini_api_keys: SecretStr
+    # assistant_api_key: SecretStr
     top_p: float = 0.95
     temperature: float = 0.5
+    pregenerate_next_scene: bool = True
 settings = AppSettings()

src/css.py CHANGED Viewed

@@ -33,11 +33,11 @@ custom_css = """
     background: rgba(0,0,0,0.7) !important;
     border: none !important;
     color: white !important;
-    font-size: 18px !important;
     line-height: 1.5 !important;
-    padding: 20px !important;
     border-radius: 10px !important;
-    margin-bottom: 20px !important;
 }
 img {
@@ -49,7 +49,7 @@ img {
     border: none !important;
     color: white !important;
     -webkit-text-fill-color: white !important;
-    font-size: 18px !important;
     resize: none !important;
 }
@@ -57,13 +57,12 @@ img {
 .choice-buttons {
     background: rgba(0,0,0,0.7) !important;
     border-radius: 10px !important;
-    padding: 15px !important;
 }
 .choice-buttons label {
     color: white !important;
-    font-size: 16px !important;
-    margin-bottom: 10px !important;
 }
 /* Fix radio button backgrounds */

     background: rgba(0,0,0,0.7) !important;
     border: none !important;
     color: white !important;
+    font-size: 15px !important;
     line-height: 1.5 !important;
+    padding: 10px !important;
     border-radius: 10px !important;
+    margin-bottom: 10px !important;
 }
 img {
     border: none !important;
     color: white !important;
     -webkit-text-fill-color: white !important;
+    font-size: 15px !important;
     resize: none !important;
 }
 .choice-buttons {
     background: rgba(0,0,0,0.7) !important;
     border-radius: 10px !important;
+    padding: 10px !important;
 }
 .choice-buttons label {
     color: white !important;
+    font-size: 14px !important;
 }
 /* Fix radio button backgrounds */

src/game_constructor.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import gradio as gr
 import json
 import uuid
-from game_setting import Character, GameSetting
 from game_state import story, state, get_current_scene
 from agent.llm_agent import process_user_input
 from images.image_generator import generate_image
 from audio.audio_generator import start_music_generation
 import asyncio
 # Predefined suggestions for demo
 SETTING_SUGGESTIONS = [
@@ -107,6 +109,7 @@ def save_game_config(
     except Exception as e:
         return f"❌ Error saving configuration: {str(e)}"
 async def start_game_with_settings(
     user_hash: str,
     setting_desc: str,
@@ -155,27 +158,41 @@ Genre: {game_setting.genre}
 You find yourself at the beginning of your adventure. The world around you feels alive with possibilities. What do you choose to do first?
-NOTE FOR THE ASSISTANT: YOU HAVE TO GENERATE THE IMAGE FOR THE START SCENE.
 """
     response = await process_user_input(initial_story)
-    music_tone = response.change_music.music_description or "neutral"
     asyncio.create_task(start_music_generation(user_hash, music_tone))
     img = "forest.jpg"
-    if response.change_scene.change_scene:
-        img_path, _ = await generate_image(response.change_scene.scene_description)
-        if img_path:
-            img = img_path
     story["start"] = {
         "text": response.game_message,
         "image": img,
-        "choices": [option.option_description for option in response.player_options],
-        "music_tone": response.change_music.music_description,
     }
     state["scene"] = "start"

 import gradio as gr
 import json
 import uuid
+from game_setting import Character, GameSetting, get_user_story
 from game_state import story, state, get_current_scene
 from agent.llm_agent import process_user_input
 from images.image_generator import generate_image
 from audio.audio_generator import start_music_generation
 import asyncio
+from config import settings
 # Predefined suggestions for demo
 SETTING_SUGGESTIONS = [
     except Exception as e:
         return f"❌ Error saving configuration: {str(e)}"
 async def start_game_with_settings(
     user_hash: str,
     setting_desc: str,
 You find yourself at the beginning of your adventure. The world around you feels alive with possibilities. What do you choose to do first?
+NOTE FOR THE ASSISTANT: YOU HAVE TO GENERATE A NEW IMAGE FOR THE START SCENE.
 """
     response = await process_user_input(initial_story)
+    music_tone = response.music_prompt
     asyncio.create_task(start_music_generation(user_hash, music_tone))
     img = "forest.jpg"
+    img_description = ""
+    img_path, img_description = await generate_image(
+        response.change_scene.scene_description
+    )
+    if img_path:
+        img = img_path
     story["start"] = {
         "text": response.game_message,
         "image": img,
+        "choices": {
+            option.option_description: asyncio.create_task(
+                process_user_input(
+                    get_user_story(
+                        response.game_message,
+                        response.change_scene.scene_description,
+                        option.option_description,
+                    )
+                )
+            ) if settings.pregenerate_next_scene else None
+            for option in response.player_options
+        },
+        "music_tone": response.music_prompt,
+        "img_description": img_description,
     }
     state["scene"] = "start"

src/game_setting.py CHANGED Viewed

@@ -1,12 +1,25 @@
 from pydantic import BaseModel
 class Character(BaseModel):
     name: str
     age: str
     background: str
     personality: str
 class GameSetting(BaseModel):
     character: Character
     setting: str
     genre: str

 from pydantic import BaseModel
 class Character(BaseModel):
     name: str
     age: str
     background: str
     personality: str
 class GameSetting(BaseModel):
     character: Character
     setting: str
     genre: str
+def get_user_story(
+    scene_description: str, scene_image_description: str, user_choice: str
+) -> str:
+    return f"""Current scene description:
+            {scene_description}
+            Current scene image description: {scene_image_description}
+            User's choice: {user_choice}
+        """

src/game_state.py CHANGED Viewed

@@ -1,10 +1,10 @@
 story = {
     "start": {
         "text": "You wake up in a mysterious forest. What do you do?",
         "image": "forest.jpg",
-        "choices": ["Explore", "Wait"],
         "music_tone": "neutral",
     },
 }
@@ -12,4 +12,4 @@ state = {"scene": "start"}
 def get_current_scene():
     scene = story[state["scene"]]
-    return scene["text"], scene["image"], scene["choices"]

 story = {
     "start": {
         "text": "You wake up in a mysterious forest. What do you do?",
         "image": "forest.jpg",
+        "choices": {"Explore": None, "Wait": None},
         "music_tone": "neutral",
+        "img_description": "forest in the fog",
     },
 }
 def get_current_scene():
     scene = story[state["scene"]]
+    return scene["text"], scene["image"], scene["choices"].keys()

src/images/image_generator.py CHANGED Viewed

@@ -6,25 +6,47 @@ from io import BytesIO
 from datetime import datetime
 from config import settings
 import logging
 logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value()).aio
 async def generate_image(prompt: str) -> tuple[str, str] | None:
     """
     Generate an image using Google's Gemini model and save it to generated/images directory.
     Args:
         prompt (str): The text prompt to generate the image from
     Returns:
         str: Path to the generated image file, or None if generation failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
     logger.info(f"Generating image with prompt: {prompt}")
     try:
@@ -32,8 +54,9 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
             model="gemini-2.0-flash-preview-image-generation",
             contents=prompt,
             config=types.GenerateContentConfig(
-                response_modalities=['TEXT', 'IMAGE'],
-            )
         )
         # Process the response parts
@@ -44,19 +67,20 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the image
                 image = Image.open(BytesIO(part.inline_data.data))
-                image.save(filepath, "PNG")
                 logger.info(f"Image saved to: {filepath}")
                 image_saved = True
-                return filepath, part.text
         if not image_saved:
             logger.error("No image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error generating image: {e}")
         return None, None
@@ -65,38 +89,41 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
 async def modify_image(image_path: str, modification_prompt: str) -> str | None:
     """
     Modify an existing image using Google's Gemini model based on a text prompt.
     Args:
         image_path (str): Path to the existing image file
         modification_prompt (str): The text prompt describing how to modify the image
     Returns:
         str: Path to the modified image file, or None if modification failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
     # Check if the input image exists
     if not os.path.exists(image_path):
         logger.error(f"Error: Image file not found at {image_path}")
         return None
     key = settings.gemini_api_key.get_secret_value()
     client = genai.Client(api_key=key).aio
     try:
         # Load the input image
         input_image = Image.open(image_path)
         # Make the API call with both text and image
         response = await client.models.generate_content(
             model="gemini-2.0-flash-preview-image-generation",
             contents=[modification_prompt, input_image],
             config=types.GenerateContentConfig(
-                response_modalities=['TEXT', 'IMAGE']
-            )
         )
         # Process the response parts
@@ -107,19 +134,20 @@ async def modify_image(image_path: str, modification_prompt: str) -> str | None:
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_modified_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the modified image
                 modified_image = Image.open(BytesIO(part.inline_data.data))
-                modified_image.save(filepath, "PNG")
                 logger.info(f"Modified image saved to: {filepath}")
                 image_saved = True
-                return filepath, part.text
         if not image_saved:
             logger.error("No modified image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error modifying image: {e}")
         return None, None
@@ -129,10 +157,10 @@ if __name__ == "__main__":
     # Example usage
     sample_prompt = "A Luke Skywalker half height sprite with white background for visual novel game"
     generated_image_path = generate_image(sample_prompt)
     # if generated_image_path:
     #     # Example modification
     #     modification_prompt = "Now the house is destroyed, and the jawas are running away"
     #     modified_image_path = modify_image(generated_image_path, modification_prompt)
     #     if modified_image_path:
-    #         print(f"Successfully modified image: {modified_image_path}")

 from datetime import datetime
 from config import settings
 import logging
+import asyncio
+import gradio as gr
 logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value()).aio
+safety_settings = [
+    types.SafetySetting(
+        category="HARM_CATEGORY_HARASSMENT",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+    types.SafetySetting(
+        category="HARM_CATEGORY_HATE_SPEECH",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+    types.SafetySetting(
+        category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+    types.SafetySetting(
+        category="HARM_CATEGORY_DANGEROUS_CONTENT",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+]
 async def generate_image(prompt: str) -> tuple[str, str] | None:
     """
     Generate an image using Google's Gemini model and save it to generated/images directory.
     Args:
         prompt (str): The text prompt to generate the image from
     Returns:
         str: Path to the generated image file, or None if generation failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
     logger.info(f"Generating image with prompt: {prompt}")
     try:
             model="gemini-2.0-flash-preview-image-generation",
             contents=prompt,
             config=types.GenerateContentConfig(
+                response_modalities=["TEXT", "IMAGE"],
+                safety_settings=safety_settings,
+            ),
         )
         # Process the response parts
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the image
                 image = Image.open(BytesIO(part.inline_data.data))
+                await asyncio.to_thread(image.save, filepath, "PNG")
                 logger.info(f"Image saved to: {filepath}")
                 image_saved = True
+                return filepath, prompt
         if not image_saved:
+            gr.Warning("Image was censored by Google!")
             logger.error("No image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error generating image: {e}")
         return None, None
 async def modify_image(image_path: str, modification_prompt: str) -> str | None:
     """
     Modify an existing image using Google's Gemini model based on a text prompt.
     Args:
         image_path (str): Path to the existing image file
         modification_prompt (str): The text prompt describing how to modify the image
     Returns:
         str: Path to the modified image file, or None if modification failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
+    logger.info(f"Modifying current scene image with prompt: {modification_prompt}")
     # Check if the input image exists
     if not os.path.exists(image_path):
         logger.error(f"Error: Image file not found at {image_path}")
         return None
     key = settings.gemini_api_key.get_secret_value()
     client = genai.Client(api_key=key).aio
     try:
         # Load the input image
         input_image = Image.open(image_path)
         # Make the API call with both text and image
         response = await client.models.generate_content(
             model="gemini-2.0-flash-preview-image-generation",
             contents=[modification_prompt, input_image],
             config=types.GenerateContentConfig(
+                response_modalities=["TEXT", "IMAGE"],
+                safety_settings=safety_settings,
+            ),
         )
         # Process the response parts
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_modified_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the modified image
                 modified_image = Image.open(BytesIO(part.inline_data.data))
+                await asyncio.to_thread(modified_image.save, filepath, "PNG")
                 logger.info(f"Modified image saved to: {filepath}")
                 image_saved = True
+                return filepath, modification_prompt
         if not image_saved:
+            gr.Warning("Updated image was censored by Google!")
             logger.error("No modified image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error modifying image: {e}")
         return None, None
     # Example usage
     sample_prompt = "A Luke Skywalker half height sprite with white background for visual novel game"
     generated_image_path = generate_image(sample_prompt)
     # if generated_image_path:
     #     # Example modification
     #     modification_prompt = "Now the house is destroyed, and the jawas are running away"
     #     modified_image_path = modify_image(generated_image_path, modification_prompt)
     #     if modified_image_path:
+    #         print(f"Successfully modified image: {modified_image_path}")

src/main.py CHANGED Viewed

@@ -7,7 +7,7 @@ from audio.audio_generator import (
 )
 import logging
 from agent.llm_agent import process_user_input
-from images.image_generator import generate_image
 import uuid
 from game_state import story, state
 from game_constructor import (
@@ -19,6 +19,8 @@ from game_constructor import (
     start_game_with_settings,
 )
 import asyncio
 logger = logging.getLogger(__name__)
@@ -43,29 +45,53 @@ async def update_scene(user_hash: str, choice):
         }
         state["scene"] = new_scene
-        user_story = f"""Current scene description:
-            {story[old_scene]["text"]}
-            User's choice: {choice}
-        """
-        response = await process_user_input(user_story)
         story[new_scene]["text"] = response.game_message
-        story[new_scene]["choices"] = [
-            option.option_description for option in response.player_options
-        ]
         # run both tasks in parallel
         img_res, _ = await asyncio.gather(
-            generate_image(response.change_scene.scene_description) if response.change_scene.change_scene else asyncio.sleep(0),
-            change_music_tone(user_hash, response.change_music.music_description) if response.change_music.change_music else asyncio.sleep(0)
         )
         if img_res and response.change_scene.change_scene:
-            img_path, _ = img_res
             if img_path:
                 story[new_scene]["image"] = img_path
     scene = story[state["scene"]]
     return (
@@ -136,7 +162,7 @@ with gr.Blocks(
     # Fullscreen Loading Indicator (hidden by default)
     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
     local_storage = gr.BrowserState(str(uuid.uuid4()), "user_hash")
     # Constructor Interface (visible by default)

 )
 import logging
 from agent.llm_agent import process_user_input
+from images.image_generator import modify_image
 import uuid
 from game_state import story, state
 from game_constructor import (
     start_game_with_settings,
 )
 import asyncio
+from game_setting import get_user_story
+from config import settings
 logger = logging.getLogger(__name__)
         }
         state["scene"] = new_scene
+        user_story = get_user_story(
+            story[old_scene]["text"], story[old_scene]["img_description"], choice
+        )
+        response = await (
+            story[old_scene]["choices"][choice] or process_user_input(user_story)
+        )
         story[new_scene]["text"] = response.game_message
+        story[new_scene]["choices"] = {
+            option.option_description: asyncio.create_task(
+                process_user_input(
+                    get_user_story(
+                        response.game_message,
+                        response.change_scene.scene_description,
+                        option.option_description,
+                    )
+                )
+            )
+            if settings.pregenerate_next_scene
+            else None
+            for option in response.player_options
+        }
+        img_task = None
+        # always modify the image to avoid hallucinations in which image is being generated in entirely different style
+        if (
+            response.change_scene.change_scene == "change_completely"
+            or response.change_scene.change_scene == "modify"
+        ):
+            img_task = modify_image(
+                story[old_scene]["image"], response.change_scene.scene_description
+            )
+        else:
+            img_task = asyncio.sleep(0)
         # run both tasks in parallel
         img_res, _ = await asyncio.gather(
+            img_task, change_music_tone(user_hash, response.music_prompt)
         )
         if img_res and response.change_scene.change_scene:
+            img_path, img_description = img_res
             if img_path:
                 story[new_scene]["image"] = img_path
+                story[new_scene]["img_description"] = img_description
     scene = story[state["scene"]]
     return (
     # Fullscreen Loading Indicator (hidden by default)
     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
     local_storage = gr.BrowserState(str(uuid.uuid4()), "user_hash")
     # Constructor Interface (visible by default)