Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,999 Bytes
17d10a7 a15d204 d448add db46bfb 1c1b50f db46bfb 1c1b50f db8ba25 db46bfb cf3593c 3168a3e cf3593c 1c1b50f db8ba25 1c1b50f 6f08234 db8ba25 dfa5d3e db8ba25 dfa5d3e 3168a3e 60b6e41 db8ba25 dfa5d3e 8b6a33e dfa5d3e db8ba25 dfa5d3e 3b58485 e7b189b 17d10a7 db8ba25 8b6a33e 6f08234 cf3593c 8b6a33e 17d10a7 d448add 3b58485 6f08234 cf3593c d448add db8ba25 dfa5d3e 3168a3e dfa5d3e db8ba25 dfa5d3e 16060e9 db8ba25 dfa5d3e 5080bd7 60b6e41 6f08234 60b6e41 07c07fa db8ba25 60b6e41 db8ba25 60b6e41 db8ba25 60b6e41 3168a3e 6f08234 3168a3e 07c07fa 60b6e41 6f08234 60b6e41 07c07fa 60b6e41 07c07fa db8ba25 07c07fa db8ba25 07c07fa db8ba25 07c07fa db8ba25 07c07fa 3fe530b 1a0bb5e a8c9cb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import gradio as gr
import os
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
pipeline,
AutoProcessor,
MusicgenForConditionalGeneration,
)
from scipy.io.wavfile import write
import tempfile
from dotenv import load_dotenv
import spaces
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
# ---------------------------------------------------------------------
# Load Llama 3 Pipeline with Zero GPU (Encapsulated)
# ---------------------------------------------------------------------
@spaces.GPU(duration=300)
def generate_script(user_prompt: str, model_id: str, token: str):
try:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
model = AutoModelForCausalLM.from_pretrained(
model_id,
use_auth_token=token,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)
llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
system_prompt = (
"You are an expert radio imaging producer specializing in sound design and music. "
"Take the user's concept and craft a concise, creative promo script with a strong focus on auditory elements and musical appeal."
)
combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script:"
result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
return result[0]["generated_text"].split("Refined script:")[-1].strip()
except Exception as e:
return f"Error generating script: {e}"
# ---------------------------------------------------------------------
# Load MusicGen Model (Encapsulated)
# ---------------------------------------------------------------------
@spaces.GPU(duration=300)
def generate_audio(prompt: str, audio_length: int):
try:
musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
musicgen_model.to("cuda")
inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt")
outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
musicgen_model.to("cpu")
sr = musicgen_model.config.audio_encoder.sampling_rate
audio_data = outputs[0, 0].cpu().numpy()
normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
output_path = f"{tempfile.gettempdir()}/generated_audio.wav"
write(output_path, sr, normalized_audio)
return output_path
except Exception as e:
return f"Error generating audio: {e}"
# ---------------------------------------------------------------------
# Gradio Interface Functions
# ---------------------------------------------------------------------
def interface_generate_script(user_prompt, llama_model_id):
return generate_script(user_prompt, llama_model_id, hf_token)
def interface_generate_audio(script, audio_length):
return generate_audio(script, audio_length)
# ---------------------------------------------------------------------
# Interface
# ---------------------------------------------------------------------
with gr.Blocks() as demo:
# Header
gr.Markdown("""
# ποΈ AI-Powered Radio Imaging Studio π
### Create stunning **radio promos** with **Llama 3** and **MusicGen**
π₯ **Zero GPU** integration for efficiency and ease!
β€οΈ A huge thanks to the **Hugging Face community** for making this possible.
""")
# Script Generation Section
gr.Markdown("## βοΈ Step 1: Generate Your Promo Script")
with gr.Row():
user_prompt = gr.Textbox(
label="π€ Enter Promo Idea",
placeholder="E.g., A 15-second energetic jingle for a morning talk show.",
lines=2,
info="Describe your promo idea clearly to generate a creative script."
)
llama_model_id = gr.Textbox(
label="ποΈ Llama 3 Model ID",
value="meta-llama/Meta-Llama-3-8B-Instruct",
info="Enter the Hugging Face model ID for Llama 3."
)
generate_script_button = gr.Button("Generate Script β¨")
script_output = gr.Textbox(
label="π Generated Promo Script",
lines=4,
interactive=False,
info="Your generated promo script will appear here."
)
# Audio Generation Section
gr.Markdown("## π§ Step 2: Generate Audio from Your Script")
with gr.Row():
audio_length = gr.Slider(
label="π΅ Audio Length (tokens)",
minimum=128,
maximum=1024,
step=64,
value=512,
info="Select the desired audio token length."
)
generate_audio_button = gr.Button("Generate Audio πΆ")
audio_output = gr.Audio(
label="πΆ Generated Audio File",
type="filepath",
interactive=False
)
# Footer
gr.Markdown("""
<br><hr>
<p style="text-align: center; font-size: 0.9em;">
Created with β€οΈ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
Special thanks to the <strong>Hugging Face community</strong> for their incredible support and tools!
</p>
""", elem_id="footer")
# Button Actions
generate_script_button.click(
fn=interface_generate_script,
inputs=[user_prompt, llama_model_id],
outputs=script_output,
)
generate_audio_button.click(
fn=interface_generate_audio,
inputs=[script_output, audio_length],
outputs=audio_output,
)
# ---------------------------------------------------------------------
# Launch App
# ---------------------------------------------------------------------
demo.launch(debug=True)
|