Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import scipy.io.wavfile | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForCausalLM, | |
pipeline, | |
AutoProcessor, | |
MusicgenForConditionalGeneration | |
) | |
# --------------------------------------------------------------------- | |
# Page Configuration | |
# --------------------------------------------------------------------- | |
st.set_page_config( | |
page_icon="π§", | |
layout="wide", | |
page_title="Radio Imaging Audio Generator - Llama 3", | |
initial_sidebar_state="expanded", | |
) | |
# --------------------------------------------------------------------- | |
# Custom CSS for a Catchy UI | |
# --------------------------------------------------------------------- | |
CUSTOM_CSS = """ | |
<style> | |
body { | |
background-color: #FAFCFF; | |
color: #1F2937; | |
font-family: 'Segoe UI', Tahoma, sans-serif; | |
} | |
h1, h2, h3, h4, h5, h6 { | |
color: #3B82F6; | |
margin-bottom: 0.5em; | |
} | |
.stButton>button { | |
background-color: #3B82F6 !important; | |
color: #FFFFFF !important; | |
border-radius: 8px !important; | |
font-size: 16px !important; | |
margin: 0.5em 0; | |
} | |
.sidebar .sidebar-content { | |
background: #E0F2FE; | |
} | |
.material-card { | |
border: 1px solid #D1D5DB; | |
border-radius: 8px; | |
padding: 1rem; | |
margin-bottom: 1rem; | |
background-color: #ffffff; | |
} | |
.footer-note { | |
text-align: center; | |
opacity: 0.6; | |
font-size: 14px; | |
margin-top: 30px; | |
} | |
</style> | |
""" | |
st.markdown(CUSTOM_CSS, unsafe_allow_html=True) | |
# --------------------------------------------------------------------- | |
# Header Section | |
# --------------------------------------------------------------------- | |
st.markdown( | |
""" | |
<h1>π Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta with Llama 3)</span></h1> | |
<p style='font-size:18px;'> | |
Generate custom radio ads, station promos, and jingles in multiple languages | |
using the **hypothetical Llama 3.3** Instruct model & MusicGen! | |
</p> | |
""", | |
unsafe_allow_html=True | |
) | |
st.markdown("---") | |
# --------------------------------------------------------------------- | |
# Instructions Section | |
# --------------------------------------------------------------------- | |
with st.expander("π How to Use This Web App"): | |
st.markdown( | |
""" | |
1. **Enter a concept** in any language: Describe the style, mood, length, etc. | |
2. **Choose Language**: If you want a Spanish script, select Spanish below (multi-language). | |
3. **Refine with Llama 3**: Let the model transform your brief into a catchy script. | |
4. **Set Audio Options**: Choose a style (Rock, Pop, Classical...) and max tokens for MusicGen output. | |
5. **Generate Audio**: Listen & optionally download or upload the WAV file. | |
**Future Enhancements**: | |
- **User Authentication**: Restrict access or track usage with logins. | |
- **Advanced Fine-tuning**: Adjust Llama or MusicGen for specialized station branding. | |
- **Cloud Storage**: Upload final WAVs to a server or cloud bucket for easy sharing. | |
""" | |
) | |
# --------------------------------------------------------------------- | |
# Sidebar: Model Selection & Options | |
# --------------------------------------------------------------------- | |
with st.sidebar: | |
st.header("π§ Model & Audio Config") | |
# Llama 3 model ID on Hugging Face (hypothetical) | |
llama_model_id = st.text_input( | |
"Llama 3 Instruct Model ID", | |
value="meta-llama/Llama-3.3-70B-Instruct", | |
help="Requires license acceptance on Hugging Face, if/when available." | |
) | |
device_option = st.selectbox( | |
"Hardware Device", | |
["auto", "cpu"], | |
help="If running locally with a GPU, choose 'auto'. CPU-only might be slow for large models." | |
) | |
st.markdown("---") | |
# Multi-language prompt | |
language = st.selectbox( | |
"Choose Output Language", | |
["English", "Spanish", "French", "German", "Other (explain in your prompt)"] | |
) | |
st.markdown("---") | |
# Audio style and tokens | |
music_style = st.selectbox( | |
"Preferred Music Style", | |
["Pop", "Rock", "Electronic", "Classical", "Hip-Hop", "Reggae", "Ambient", "Other"] | |
) | |
audio_tokens = st.slider( | |
"MusicGen Max Tokens (Approx. Track Length)", | |
min_value=128, max_value=1024, value=512, step=64 | |
) | |
# --------------------------------------------------------------------- | |
# Prompt Input | |
# --------------------------------------------------------------------- | |
st.markdown("## βπ» Write Your Concept Brief") | |
prompt = st.text_area( | |
"Describe the radio imaging or jingle you want to create.", | |
placeholder="e.g. 'An energetic 15-second pop jingle in Spanish for a morning radio show...'" | |
) | |
# --------------------------------------------------------------------- | |
# Text Generation with Llama 3 | |
# --------------------------------------------------------------------- | |
def load_llama_pipeline(model_id: str, device: str): | |
""" | |
Load the Llama or other open-source model as a text-generation pipeline. | |
This is hypothetical for Llama 3.3. | |
Must accept license on HF if the model is restricted. | |
""" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype=torch.float16 if device == "auto" else torch.float32, | |
device_map=device | |
) | |
gen_pipeline = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device_map=device | |
) | |
return gen_pipeline | |
def generate_description(user_prompt: str, pipeline_gen, language_choice: str): | |
""" | |
Use the pipeline to create a refined description for MusicGen, | |
with multi-language capabilities. | |
""" | |
# Instruction for Llama (system prompt): | |
system_prompt = ( | |
"You are a creative ad copywriter specialized in radio imaging. " | |
"Refine the user's concept into a concise script. " | |
"Incorporate the language choice and creative elements for a promotional audio spot." | |
) | |
# Combine user prompt + language + the system instructions | |
combined_prompt = ( | |
f"{system_prompt}\n" | |
f"Language to use: {language_choice}\n" | |
f"User Concept: {user_prompt}\n" | |
f"Your refined ad script:" | |
) | |
result = pipeline_gen( | |
combined_prompt, | |
max_new_tokens=300, | |
do_sample=True, | |
temperature=0.8 | |
) | |
generated_text = result[0]["generated_text"] | |
# Attempt to isolate the script portion | |
if "script:" in generated_text.lower(): | |
generated_text = generated_text.split("script:", 1)[-1].strip() | |
# Add a sign-off or brand line | |
generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Powered by Llama 3)" | |
return generated_text | |
# Button: Generate Description | |
if st.button("π Refine Description with Llama 3"): | |
if not prompt.strip(): | |
st.error("Please provide a concept before generating a description.") | |
else: | |
with st.spinner("Generating a refined description..."): | |
try: | |
pipeline_llama = load_llama_pipeline(llama_model_id, device_option) | |
refined_text = generate_description(prompt, pipeline_llama, language) | |
st.session_state['refined_prompt'] = refined_text | |
st.success("Description successfully refined!") | |
st.write(refined_text) | |
st.download_button( | |
"π₯ Download Description", | |
refined_text, | |
file_name="refined_description.txt" | |
) | |
except Exception as e: | |
st.error(f"Error while generating with Llama 3: {e}") | |
st.markdown("---") | |
# --------------------------------------------------------------------- | |
# MusicGen: Generate Audio | |
# --------------------------------------------------------------------- | |
def load_musicgen_model(): | |
"""Load and cache the MusicGen model and processor.""" | |
mg_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") | |
mg_processor = AutoProcessor.from_pretrained("facebook/musicgen-small") | |
return mg_model, mg_processor | |
if st.button("βΆ Generate Audio with MusicGen"): | |
if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']: | |
st.error("Please generate or have a refined script before creating audio.") | |
else: | |
descriptive_text = st.session_state['refined_prompt'] | |
with st.spinner("Generating your audio..."): | |
try: | |
musicgen_model, processor = load_musicgen_model() | |
# Incorporate the style preference into the final text | |
final_text_for_music = f"{descriptive_text}\nStyle preference: {music_style}" | |
# Use the refined prompt + style as input | |
inputs = processor( | |
text=[final_text_for_music], | |
padding=True, | |
return_tensors="pt" | |
) | |
# Adjust max_new_tokens for track length | |
audio_values = musicgen_model.generate(**inputs, max_new_tokens=audio_tokens) | |
sampling_rate = musicgen_model.config.audio_encoder.sampling_rate | |
# Save & display the audio | |
audio_filename = f"radio_imaging_output_{music_style.lower()}.wav" | |
scipy.io.wavfile.write( | |
audio_filename, | |
rate=sampling_rate, | |
data=audio_values[0, 0].numpy() | |
) | |
st.success("Audio successfully generated!") | |
st.audio(audio_filename) | |
# Optionally, prompt to "Upload to Cloud" or "Save to Directory" | |
if st.checkbox("Upload this WAV to cloud storage? (Demo)"): | |
with st.spinner("Uploading... (This is a placeholder)"): | |
# Pseudocode for your custom logic, e.g.: | |
# upload_to_s3(audio_filename, bucket_name="radio-imaging-bucket") | |
st.success("File uploaded to your cloud storage (placeholder).") | |
except Exception as e: | |
st.error(f"Error while generating audio: {e}") | |
# --------------------------------------------------------------------- | |
# Footer Section | |
# --------------------------------------------------------------------- | |
st.markdown("---") | |
st.markdown( | |
"<div class='footer-note'>" | |
"β Built with a hypothetical Llama 3.3 & MusicGen Β· " | |
"Multi-language, advanced styles, and a hint of future expansions Β· " | |
"Happy producing!" | |
"</div>", | |
unsafe_allow_html=True | |
) | |
# Hide Streamlit's default menu and footer if you wish | |
st.markdown("<style>#MainMenu {visibility: hidden;} footer {visibility: hidden;}</style>", unsafe_allow_html=True) | |