Spaces:
Running
Running
File size: 6,034 Bytes
17d10a7 a15d204 d448add db46bfb 1c1b50f db46bfb 1c1b50f db8ba25 db46bfb cf3593c 3168a3e cf3593c 1c1b50f db8ba25 1c1b50f b6700b8 db8ba25 dfa5d3e db8ba25 dfa5d3e 3168a3e 60b6e41 db8ba25 dfa5d3e a3b5047 dfa5d3e db8ba25 dfa5d3e 3b58485 e7b189b 17d10a7 db8ba25 a3b5047 8b6a33e cf3593c a3b5047 17d10a7 d448add a3b5047 3b58485 a3b5047 3b58485 6f08234 cf3593c d448add a3b5047 dfa5d3e 3168a3e dfa5d3e db8ba25 dfa5d3e a3b5047 db8ba25 a3b5047 dfa5d3e 5080bd7 60b6e41 a3b5047 60b6e41 6f08234 a3b5047 60b6e41 07c07fa db8ba25 60b6e41 db8ba25 a3b5047 60b6e41 db8ba25 60b6e41 a3b5047 60b6e41 a3b5047 3168a3e a3b5047 3168a3e a3b5047 6f08234 b6700b8 a3b5047 b6700b8 07c07fa 60b6e41 a3b5047 60b6e41 b6700b8 60b6e41 a3b5047 07c07fa 60b6e41 07c07fa db8ba25 07c07fa db8ba25 07c07fa db8ba25 07c07fa db8ba25 07c07fa 3fe530b 1a0bb5e a8c9cb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import gradio as gr
import os
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
pipeline,
AutoProcessor,
MusicgenForConditionalGeneration,
)
from scipy.io.wavfile import write
import tempfile
from dotenv import load_dotenv
import spaces
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
# ---------------------------------------------------------------------
# Load Llama 3 Pipeline with Zero GPU (Encapsulated)
# ---------------------------------------------------------------------
@spaces.GPU(duration=300) # GPU allocation for 300 seconds
def generate_script(user_prompt: str, model_id: str, token: str):
try:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
model = AutoModelForCausalLM.from_pretrained(
model_id,
use_auth_token=token,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)
llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
system_prompt = (
"You are an expert radio imaging producer specializing in sound design and music. "
"Take the user's concept and craft a concise, creative promo script with a strong focus on auditory elements and musical appeal."
)
combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script:"
result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
return result[0]["generated_text"].split("Refined script:")[-1].strip()
except Exception as e:
return f"Error generating script: {e}"
# ---------------------------------------------------------------------
# Load MusicGen Model (Encapsulated)
# ---------------------------------------------------------------------
@spaces.GPU(duration=300)
def generate_audio(prompt: str, audio_length: int):
try:
musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
# Ensure everything is on the same device (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
musicgen_model.to(device)
inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt").to(device)
outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
# Move outputs to CPU for further processing
audio_data = outputs[0, 0].cpu().numpy()
# Normalize and save the audio file
normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
output_path = f"{tempfile.gettempdir()}/generated_audio.wav"
write(output_path, musicgen_model.config.audio_encoder.sampling_rate, normalized_audio)
return output_path
except Exception as e:
return f"Error generating audio: {e}"
# ---------------------------------------------------------------------
# Gradio Interface Functions
# ---------------------------------------------------------------------
def interface_generate_script(user_prompt, llama_model_id):
return generate_script(user_prompt, llama_model_id, hf_token)
def interface_generate_audio(script, audio_length):
return generate_audio(script, audio_length)
# ---------------------------------------------------------------------
# Interface
# ---------------------------------------------------------------------
with gr.Blocks() as demo:
# Header
gr.Markdown(
"""
# π§ AI-Powered Radio Imaging Studio π
### Create stunning **radio promos** with **Llama 3** and **MusicGen**
π₯ **Zero GPU** integration for efficiency and ease!
"""
)
# Script Generation Section
gr.Markdown("## βοΈ Step 1: Generate Your Promo Script")
with gr.Row():
user_prompt = gr.Textbox(
label="π€ Enter Promo Idea",
placeholder="E.g., A 15-second energetic jingle for a morning talk show.",
lines=2,
info="Describe your promo idea clearly to generate a creative script."
)
llama_model_id = gr.Textbox(
label="πΏ Llama 3 Model ID",
value="meta-llama/Meta-Llama-3-8B-Instruct",
info="Enter the Hugging Face model ID for Llama 3."
)
generate_script_button = gr.Button("Generate Script β¨")
script_output = gr.Textbox(
label="ποΈ Generated Promo Script",
lines=4,
interactive=False,
info="Your generated promo script will appear here."
)
# Audio Generation Section
gr.Markdown("## π΅ Step 2: Generate Audio from Your Script")
with gr.Row():
audio_length = gr.Slider(
label="πΆ Audio Length (tokens)",
minimum=128,
maximum=1024,
step=64,
value=512,
info="Select the desired audio token length."
)
generate_audio_button = gr.Button("Generate Audio πΆ")
audio_output = gr.Audio(
label="π΅ Generated Audio File",
type="filepath",
interactive=False
)
# Footer
gr.Markdown(
"""
<br><hr>
<p style="text-align: center; font-size: 0.9em;">
Created with β€οΈ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
</p>
""",
elem_id="footer"
)
# Button Actions
generate_script_button.click(
fn=interface_generate_script,
inputs=[user_prompt, llama_model_id],
outputs=script_output,
)
generate_audio_button.click(
fn=interface_generate_audio,
inputs=[script_output, audio_length],
outputs=audio_output,
)
# ---------------------------------------------------------------------
# Launch App
# ---------------------------------------------------------------------
demo.launch(debug=True)
|