Spaces:

Bils
/

AIPromoStudio

Sleeping

App Files Files Community

AIPromoStudio / app.py

Bils

Update app.py

621eae6 verified 5 months ago

raw

history blame

10.9 kB

	import streamlit as st
	import torch
	import scipy.io.wavfile
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	pipeline,
	AutoProcessor,
	MusicgenForConditionalGeneration
	)

	# ---------------------------------------------------------------------
	# Page Configuration
	# ---------------------------------------------------------------------
	st.set_page_config(
	page_icon="🎧",
	layout="wide",
	page_title="Radio Imaging Audio Generator - Llama 3",
	initial_sidebar_state="expanded",
	)

	# ---------------------------------------------------------------------
	# Custom CSS for a Catchy UI
	# ---------------------------------------------------------------------
	CUSTOM_CSS = """
	<style>
	body {
	background-color: #FAFCFF;
	color: #1F2937;
	font-family: 'Segoe UI', Tahoma, sans-serif;
	}
	h1, h2, h3, h4, h5, h6 {
	color: #3B82F6;
	margin-bottom: 0.5em;
	}
	.stButton>button {
	background-color: #3B82F6 !important;
	color: #FFFFFF !important;
	border-radius: 8px !important;
	font-size: 16px !important;
	margin: 0.5em 0;
	}
	.sidebar .sidebar-content {
	background: #E0F2FE;
	}
	.material-card {
	border: 1px solid #D1D5DB;
	border-radius: 8px;
	padding: 1rem;
	margin-bottom: 1rem;
	background-color: #ffffff;
	}
	.footer-note {
	text-align: center;
	opacity: 0.6;
	font-size: 14px;
	margin-top: 30px;
	}
	</style>
	"""
	st.markdown(CUSTOM_CSS, unsafe_allow_html=True)

	# ---------------------------------------------------------------------
	# Header Section
	# ---------------------------------------------------------------------
	st.markdown(
	"""
	<h1>🎙 Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta with Llama 3)</span></h1>
	<p style='font-size:18px;'>
	Generate custom radio ads, station promos, and jingles in multiple languages
	using the hypothetical Llama 3.3 Instruct model & MusicGen!
	</p>
	""",
	unsafe_allow_html=True
	)
	st.markdown("---")

	# ---------------------------------------------------------------------
	# Instructions Section
	# ---------------------------------------------------------------------
	with st.expander("📘 How to Use This Web App"):
	st.markdown(
	"""
	1. Enter a concept in any language: Describe the style, mood, length, etc.
	2. Choose Language: If you want a Spanish script, select Spanish below (multi-language).
	3. Refine with Llama 3: Let the model transform your brief into a catchy script.
	4. Set Audio Options: Choose a style (Rock, Pop, Classical...) and max tokens for MusicGen output.
	5. Generate Audio: Listen & optionally download or upload the WAV file.

	Future Enhancements:
	- User Authentication: Restrict access or track usage with logins.
	- Advanced Fine-tuning: Adjust Llama or MusicGen for specialized station branding.
	- Cloud Storage: Upload final WAVs to a server or cloud bucket for easy sharing.
	"""
	)

	# ---------------------------------------------------------------------
	# Sidebar: Model Selection & Options
	# ---------------------------------------------------------------------
	with st.sidebar:
	st.header("🔧 Model & Audio Config")

	# Llama 3 model ID on Hugging Face (hypothetical)
	llama_model_id = st.text_input(
	"Llama 3 Instruct Model ID",
	value="meta-llama/Llama-3.3-70B-Instruct",
	help="Requires license acceptance on Hugging Face, if/when available."
	)

	device_option = st.selectbox(
	"Hardware Device",
	["auto", "cpu"],
	help="If running locally with a GPU, choose 'auto'. CPU-only might be slow for large models."
	)

	st.markdown("---")

	# Multi-language prompt
	language = st.selectbox(
	"Choose Output Language",
	["English", "Spanish", "French", "German", "Other (explain in your prompt)"]
	)

	st.markdown("---")

	# Audio style and tokens
	music_style = st.selectbox(
	"Preferred Music Style",
	["Pop", "Rock", "Electronic", "Classical", "Hip-Hop", "Reggae", "Ambient", "Other"]
	)
	audio_tokens = st.slider(
	"MusicGen Max Tokens (Approx. Track Length)",
	min_value=128, max_value=1024, value=512, step=64
	)

	# ---------------------------------------------------------------------
	# Prompt Input
	# ---------------------------------------------------------------------
	st.markdown("## ✍🏻 Write Your Concept Brief")
	prompt = st.text_area(
	"Describe the radio imaging or jingle you want to create.",
	placeholder="e.g. 'An energetic 15-second pop jingle in Spanish for a morning radio show...'"
	)

	# ---------------------------------------------------------------------
	# Text Generation with Llama 3
	# ---------------------------------------------------------------------
	@st.cache_resource
	def load_llama_pipeline(model_id: str, device: str):
	"""
	Load the Llama or other open-source model as a text-generation pipeline.
	This is hypothetical for Llama 3.3.
	Must accept license on HF if the model is restricted.
	"""
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16 if device == "auto" else torch.float32,
	device_map=device
	)
	gen_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device_map=device
	)
	return gen_pipeline

	def generate_description(user_prompt: str, pipeline_gen, language_choice: str):
	"""
	Use the pipeline to create a refined description for MusicGen,
	with multi-language capabilities.
	"""
	# Instruction for Llama (system prompt):
	system_prompt = (
	"You are a creative ad copywriter specialized in radio imaging. "
	"Refine the user's concept into a concise script. "
	"Incorporate the language choice and creative elements for a promotional audio spot."
	)

	# Combine user prompt + language + the system instructions
	combined_prompt = (
	f"{system_prompt}\n"
	f"Language to use: {language_choice}\n"
	f"User Concept: {user_prompt}\n"
	f"Your refined ad script:"
	)

	result = pipeline_gen(
	combined_prompt,
	max_new_tokens=300,
	do_sample=True,
	temperature=0.8
	)
	generated_text = result[0]["generated_text"]

	# Attempt to isolate the script portion
	if "script:" in generated_text.lower():
	generated_text = generated_text.split("script:", 1)[-1].strip()

	# Add a sign-off or brand line
	generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Powered by Llama 3)"
	return generated_text

	# Button: Generate Description
	if st.button("📄 Refine Description with Llama 3"):
	if not prompt.strip():
	st.error("Please provide a concept before generating a description.")
	else:
	with st.spinner("Generating a refined description..."):
	try:
	pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
	refined_text = generate_description(prompt, pipeline_llama, language)
	st.session_state['refined_prompt'] = refined_text
	st.success("Description successfully refined!")
	st.write(refined_text)
	st.download_button(
	"📥 Download Description",
	refined_text,
	file_name="refined_description.txt"
	)
	except Exception as e:
	st.error(f"Error while generating with Llama 3: {e}")

	st.markdown("---")

	# ---------------------------------------------------------------------
	# MusicGen: Generate Audio
	# ---------------------------------------------------------------------
	@st.cache_resource
	def load_musicgen_model():
	"""Load and cache the MusicGen model and processor."""
	mg_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
	mg_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
	return mg_model, mg_processor

	if st.button("▶ Generate Audio with MusicGen"):
	if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
	st.error("Please generate or have a refined script before creating audio.")
	else:
	descriptive_text = st.session_state['refined_prompt']
	with st.spinner("Generating your audio..."):
	try:
	musicgen_model, processor = load_musicgen_model()

	# Incorporate the style preference into the final text
	final_text_for_music = f"{descriptive_text}\nStyle preference: {music_style}"

	# Use the refined prompt + style as input
	inputs = processor(
	text=[final_text_for_music],
	padding=True,
	return_tensors="pt"
	)
	# Adjust max_new_tokens for track length
	audio_values = musicgen_model.generate(**inputs, max_new_tokens=audio_tokens)
	sampling_rate = musicgen_model.config.audio_encoder.sampling_rate

	# Save & display the audio
	audio_filename = f"radio_imaging_output_{music_style.lower()}.wav"
	scipy.io.wavfile.write(
	audio_filename,
	rate=sampling_rate,
	data=audio_values[0, 0].numpy()
	)

	st.success("Audio successfully generated!")
	st.audio(audio_filename)

	# Optionally, prompt to "Upload to Cloud" or "Save to Directory"
	if st.checkbox("Upload this WAV to cloud storage? (Demo)"):
	with st.spinner("Uploading... (This is a placeholder)"):
	# Pseudocode for your custom logic, e.g.:
	# upload_to_s3(audio_filename, bucket_name="radio-imaging-bucket")
	st.success("File uploaded to your cloud storage (placeholder).")
	except Exception as e:
	st.error(f"Error while generating audio: {e}")

	# ---------------------------------------------------------------------
	# Footer Section
	# ---------------------------------------------------------------------
	st.markdown("---")
	st.markdown(
	"<div class='footer-note'>"
	"✅ Built with a hypothetical Llama 3.3 & MusicGen · "
	"Multi-language, advanced styles, and a hint of future expansions · "
	"Happy producing!"
	"</div>",
	unsafe_allow_html=True
	)
	# Hide Streamlit's default menu and footer if you wish
	st.markdown("<style>#MainMenu {visibility: hidden;} footer {visibility: hidden;}</style>", unsafe_allow_html=True)