AIPromoStudio / app.py
Bils's picture
Update app.py
3fe530b verified
raw
history blame
11.9 kB
import streamlit as st
import torch
import scipy.io.wavfile
import requests
from io import BytesIO
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
pipeline,
AutoProcessor,
MusicgenForConditionalGeneration
)
from streamlit_lottie import st_lottie # pip install streamlit-lottie
# ---------------------------------------------------------------------
# 1) Page Configuration
# ---------------------------------------------------------------------
st.set_page_config(
page_title="Modern Radio Imaging Generator - Llama 3 & MusicGen",
page_icon="🎧",
layout="wide"
)
# ---------------------------------------------------------------------
# 2) Custom CSS for a Sleek, Modern Look
# ---------------------------------------------------------------------
MODERN_CSS = """
<style>
/* Body styling */
body {
background: linear-gradient(to bottom right, #ffffff, #f3f4f6);
font-family: 'Helvetica Neue', Arial, sans-serif;
color: #1F2937;
}
/* Make the container narrower for a sleek look */
.block-container {
max-width: 1100px;
}
/* Heading style */
h1, h2, h3, h4, h5, h6 {
color: #3B82F6;
margin-bottom: 0.5em;
}
/* Buttons */
.stButton>button {
background-color: #3B82F6 !important;
color: #FFFFFF !important;
border-radius: 0.8rem !important;
font-size: 1rem !important;
padding: 0.6rem 1.2rem !important;
}
/* Sidebar customization */
.sidebar .sidebar-content {
background: #E0F2FE;
}
/* Text input areas */
textarea, input, select {
border-radius: 0.5rem !important;
}
/* Animate some elements on hover (just an example) */
.stButton>button:hover {
background-color: #2563EB !important;
transition: background-color 0.3s ease-in-out;
}
/* Lottie container style */
.lottie-container {
display: flex;
justify-content: center;
margin: 1rem 0;
}
/* Footer note */
.footer-note {
text-align: center;
opacity: 0.7;
font-size: 14px;
margin-top: 2rem;
}
/* Hide default Streamlit branding if desired */
#MainMenu, footer {visibility: hidden;}
</style>
"""
st.markdown(MODERN_CSS, unsafe_allow_html=True)
# ---------------------------------------------------------------------
# 3) Lottie Animation Loader
# ---------------------------------------------------------------------
@st.cache_data
def load_lottie_url(url: str):
"""
Loads a Lottie animation JSON from a given URL.
"""
r = requests.get(url)
if r.status_code != 200:
return None
return r.json()
# Example Lottie animations (feel free to replace with your own):
LOTTIE_URL_HEADER = "https://assets1.lottiefiles.com/packages/lf20_amhnytsm.json" # music-themed animation
lottie_music = load_lottie_url(LOTTIE_URL_HEADER)
# ---------------------------------------------------------------------
# 4) Header & Intro with a Lottie Animation
# ---------------------------------------------------------------------
col_header1, col_header2 = st.columns([3, 2], gap="medium")
with col_header1:
st.markdown(
"""
<h1>πŸŽ™ Radio Imaging Generator (Beta)</h1>
<p style='font-size:18px;'>
Create catchy radio promos, ads, and station jingles with
a modern UI, Llama 3 text generation, and MusicGen audio!
</p>
""",
unsafe_allow_html=True
)
with col_header2:
if lottie_music:
with st.container():
st_lottie(lottie_music, height=180, key="header_lottie")
else:
# Fallback if Lottie fails to load
st.markdown("*(Animation unavailable)*")
st.markdown("---")
# ---------------------------------------------------------------------
# 5) Explanation in an Expander
# ---------------------------------------------------------------------
with st.expander("πŸ“˜ How to Use This App"):
st.markdown(
"""
**Steps**:
1. **Model & Language**: In the sidebar, choose the Llama model ID (e.g. a real Llama 2) and the device.
2. **Enter Concept**: Provide a short description of the ad or jingle you want.
3. **Refine**: Click on "Refine with Llama 3" to get a polished script in your chosen language or style.
4. **Generate Audio**: Use MusicGen to create a short audio snippet from that refined script.
5. **Listen & Download**: Enjoy or download the result as a WAV file.
**Note**:
- If "Llama 3.3" doesn't exist, you'll get errors. Use a real model from [Hugging Face](https://huggingface.co/models)
like `meta-llama/Llama-2-7b-chat-hf`.
- Some large models require GPU (or specialized hardware) for feasible speeds.
- This example uses [streamlit-lottie](https://github.com/andfanilo/streamlit-lottie) for animation.
"""
)
# ---------------------------------------------------------------------
# 6) Sidebar Configuration
# ---------------------------------------------------------------------
with st.sidebar:
st.header("πŸ”§ Llama 3 & Audio Settings")
# Model input
llama_model_id = st.text_input(
"Llama Model ID",
value="meta-llama/Llama-3.3-70B-Instruct", # Fictitious, please replace with a real model
help="Replace with a real model, e.g. meta-llama/Llama-2-7b-chat-hf"
)
device_option = st.selectbox(
"Hardware Device",
["auto", "cpu"],
index=0,
help="If local GPU is available, choose 'auto'. CPU might be slow for large models."
)
# Multi-language or style
language_choice = st.selectbox(
"Choose Language",
["English", "Spanish", "French", "German", "Other (describe in prompt)"]
)
# Music style & max tokens
music_style = st.selectbox(
"Preferred Music Style",
["Pop", "Rock", "Electronic", "Classical", "Hip-Hop", "Reggae", "Ambient", "Other"]
)
audio_tokens = st.slider("MusicGen Max Tokens (Track Length)", 128, 1024, 512, 64)
# ---------------------------------------------------------------------
# 7) Prompt for the Radio Imaging Concept
# ---------------------------------------------------------------------
st.markdown("## ✍️ Your Radio Concept")
prompt = st.text_area(
"Describe the theme, audience, length, energy level, etc.",
placeholder="E.g. 'A high-energy 10-second pop jingle for a morning radio show...'"
)
# ---------------------------------------------------------------------
# 8) Load Llama Pipeline
# ---------------------------------------------------------------------
@st.cache_resource
def load_llama_pipeline(model_id: str, device: str):
"""
Loads the specified Llama or other HF model as a text-generation pipeline.
This references a hypothetical Llama 3.3.
"""
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16 if device == "auto" else torch.float32,
device_map=device
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map=device
)
return pipe
def refine_description_with_llama(user_prompt: str, pipeline_llama, lang: str):
"""
Create a polished script using Llama.
Incorporate a language preference or style instructions.
"""
system_msg = (
"You are an expert radio imaging script writer. "
"Refine the user's concept into a concise, compelling piece. "
"Ensure to reflect any language or style requests."
)
combined = f"{system_msg}\nLanguage: {lang}\nUser Concept: {user_prompt}\nRefined Script:"
result = pipeline_llama(
combined,
max_new_tokens=300,
do_sample=True,
temperature=0.8
)
text = result[0]["generated_text"]
# Attempt to isolate the final portion
if "Refined Script:" in text:
text = text.split("Refined Script:")[-1].strip()
text += "\n\n(Generated with Llama 3 - Modern Radio Generator)"
return text
# ---------------------------------------------------------------------
# 9) Buttons & Outputs
# ---------------------------------------------------------------------
col_gen1, col_gen2 = st.columns(2)
with col_gen1:
if st.button("πŸ“„ Refine with Llama 3"):
if not prompt.strip():
st.error("Please provide a brief concept first.")
else:
with st.spinner("Refining your script..."):
try:
pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
refined_text = refine_description_with_llama(prompt, pipeline_llama, language_choice)
st.session_state['refined_prompt'] = refined_text
st.success("Refined text generated!")
st.write(refined_text)
st.download_button(
"πŸ’Ύ Download Script",
refined_text,
file_name="refined_jingle_script.txt"
)
except Exception as e:
st.error(f"Error: {e}")
with col_gen2:
if st.button("β–Ά Generate Audio with MusicGen"):
if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
st.error("No refined prompt found. Please generate/refine your script first.")
else:
final_text_for_music = st.session_state['refined_prompt']
final_text_for_music += f"\nPreferred style: {music_style}"
with st.spinner("Generating audio..."):
try:
mg_model, mg_processor = None, None
# Load MusicGen model once
mg_model, mg_processor = load_musicgen_model()
inputs = mg_processor(
text=[final_text_for_music],
padding=True,
return_tensors="pt"
)
audio_output = mg_model.generate(**inputs, max_new_tokens=audio_tokens)
sr = mg_model.config.audio_encoder.sampling_rate
audio_filename = f"radio_imaging_{music_style.lower()}.wav"
scipy.io.wavfile.write(
audio_filename,
rate=sr,
data=audio_output[0, 0].numpy()
)
st.success("Audio generated! Listen below:")
st.audio(audio_filename)
# Optional Save/Upload prompt
if st.checkbox("Upload this WAV to a cloud (demo)?"):
with st.spinner("Uploading..."):
# Placeholder for your own S3 or cloud logic
st.success("Uploaded (placeholder).")
except Exception as e:
st.error(f"Error generating audio: {e}")
# ---------------------------------------------------------------------
# 10) Load & Cache MusicGen
# ---------------------------------------------------------------------
@st.cache_resource
def load_musicgen_model():
"""
Load and cache the MusicGen model & processor.
Using 'facebook/musicgen-small' as example.
"""
mgm = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
mgp = AutoProcessor.from_pretrained("facebook/musicgen-small")
return mgm, mgp
# ---------------------------------------------------------------------
# 11) Footer
# ---------------------------------------------------------------------
st.markdown("---")
st.markdown(
"""
<div class='footer-note'>
Β© 2025 Modern Radio Generator - Built with Llama & MusicGen |
<a href='https://example.com' target='_blank'>YourCompany</a>
</div>
""",
unsafe_allow_html=True
)