Spaces:

Bils
/

AIPromoStudio

Sleeping

App Files Files Community

Bils commited on Jan 12

Commit

dfa5d3e

verified ·

1 Parent(s): b5ad742

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -5

app.py CHANGED Viewed

@@ -11,11 +11,15 @@ from transformers import (
 from scipy.io.wavfile import write
 import tempfile
 from dotenv import load_dotenv
-import spaces
 load_dotenv()
 hf_token = os.getenv("HF_TOKEN")
 @spaces.GPU(duration=120)
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
     try:
@@ -24,20 +28,50 @@ def load_llama_pipeline_zero_gpu(model_id: str, token: str):
             model_id,
             use_auth_token=token,
             torch_dtype=torch.float16,
-            device_map="auto",
             trust_remote_code=True
         )
         return pipeline("text-generation", model=model, tokenizer=tokenizer)
     except Exception as e:
         return str(e)
 @spaces.GPU(duration=120)
 def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
     try:
-        mg_model.to("cuda")
         inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
         outputs = mg_model.generate(**inputs, max_new_tokens=audio_length)
-        mg_model.to("cpu")
         sr = mg_model.config.audio_encoder.sampling_rate
         audio_data = outputs[0, 0].cpu().numpy()
@@ -49,6 +83,33 @@ def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
     except Exception as e:
         return f"Error generating audio: {e}"
 with gr.Blocks() as demo:
     gr.Markdown("# 🎧 AI Radio Imaging with Llama 3 + MusicGen (Zero GPU)")
     user_prompt = gr.Textbox(label="Enter your promo idea", placeholder="E.g., A 15-second hype jingle for a morning talk show.")
@@ -61,7 +122,7 @@ with gr.Blocks() as demo:
     audio_output = gr.Audio(label="Generated Audio", type="filepath")
     generate_button.click(
-        fn=lambda prompt, model_id, token, length: (prompt, None),  # Simplify for demo
         inputs=[user_prompt, llama_model_id, hf_token, audio_length],
         outputs=[script_output, audio_output]
     )

 from scipy.io.wavfile import write
 import tempfile
 from dotenv import load_dotenv
+import spaces  # Assumes Hugging Face Spaces library supports `@spaces.GPU`
+# Load environment variables (e.g., Hugging Face token)
 load_dotenv()
 hf_token = os.getenv("HF_TOKEN")
+# ---------------------------------------------------------------------
+# Load Llama 3 Model with Zero GPU
+# ---------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
     try:
             model_id,
             use_auth_token=token,
             torch_dtype=torch.float16,
+            device_map="auto",  # Automatically handles GPU allocation
             trust_remote_code=True
         )
         return pipeline("text-generation", model=model, tokenizer=tokenizer)
     except Exception as e:
         return str(e)
+# ---------------------------------------------------------------------
+# Generate Radio Script
+# ---------------------------------------------------------------------
+def generate_script(user_input: str, pipeline_llama):
+    try:
+        system_prompt = (
+            "You are a top-tier radio imaging producer using Llama 3. "
+            "Take the user's concept and craft a short, creative promo script."
+        )
+        combined_prompt = f"{system_prompt}\nUser concept: {user_input}\nRefined script:"
+        result = pipeline_llama(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
+        return result[0]['generated_text'].split("Refined script:")[-1].strip()
+    except Exception as e:
+        return f"Error generating script: {e}"
+# ---------------------------------------------------------------------
+# Load MusicGen Model
+# ---------------------------------------------------------------------
+@spaces.GPU(duration=120)
+def load_musicgen_model():
+    try:
+        model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+        processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+        return model, processor
+    except Exception as e:
+        return None, str(e)
+# ---------------------------------------------------------------------
+# Generate Audio
+# ---------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
     try:
+        mg_model.to("cuda")  # Move the model to GPU
         inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
         outputs = mg_model.generate(**inputs, max_new_tokens=audio_length)
+        mg_model.to("cpu")  # Return the model to CPU
         sr = mg_model.config.audio_encoder.sampling_rate
         audio_data = outputs[0, 0].cpu().numpy()
     except Exception as e:
         return f"Error generating audio: {e}"
+# ---------------------------------------------------------------------
+# Gradio Interface
+# ---------------------------------------------------------------------
+def radio_imaging_app(user_prompt, llama_model_id, hf_token, audio_length):
+    # Load Llama 3 Pipeline with Zero GPU
+    pipeline_llama = load_llama_pipeline_zero_gpu(llama_model_id, hf_token)
+    if isinstance(pipeline_llama, str):
+        return pipeline_llama, None
+    # Generate Script
+    script = generate_script(user_prompt, pipeline_llama)
+    # Load MusicGen
+    mg_model, mg_processor = load_musicgen_model()
+    if isinstance(mg_processor, str):
+        return script, mg_processor
+    # Generate Audio
+    audio_data = generate_audio(script, audio_length, mg_model, mg_processor)
+    if isinstance(audio_data, str):
+        return script, audio_data
+    return script, audio_data
+# ---------------------------------------------------------------------
+# Interface
+# ---------------------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# 🎧 AI Radio Imaging with Llama 3 + MusicGen (Zero GPU)")
     user_prompt = gr.Textbox(label="Enter your promo idea", placeholder="E.g., A 15-second hype jingle for a morning talk show.")
     audio_output = gr.Audio(label="Generated Audio", type="filepath")
     generate_button.click(
+        fn=radio_imaging_app,
         inputs=[user_prompt, llama_model_id, hf_token, audio_length],
         outputs=[script_output, audio_output]
     )