Spaces:

Bils
/

AIPromoStudio

Running

App Files Files Community

Bils commited on Jan 9

Commit

621eae6

verified ·

1 Parent(s): 5102a64

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -59

app.py CHANGED Viewed

@@ -15,28 +15,30 @@ from transformers import (
 st.set_page_config(
     page_icon="🎧",
     layout="wide",
-    page_title="Radio Imaging Audio Generator - Llama & MusicGen",
     initial_sidebar_state="expanded",
 )
 # ---------------------------------------------------------------------
-# Custom CSS for a Vibrant UI
 # ---------------------------------------------------------------------
 CUSTOM_CSS = """
 <style>
 body {
-    background-color: #F8FBFE;
     color: #1F2937;
     font-family: 'Segoe UI', Tahoma, sans-serif;
 }
 h1, h2, h3, h4, h5, h6 {
     color: #3B82F6;
 }
 .stButton>button {
     background-color: #3B82F6 !important;
     color: #FFFFFF !important;
     border-radius: 8px !important;
     font-size: 16px !important;
 }
 .sidebar .sidebar-content {
     background: #E0F2FE;
@@ -63,9 +65,10 @@ st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
 # ---------------------------------------------------------------------
 st.markdown(
     """
-    <h1>Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta)</span></h1>
     <p style='font-size:18px;'>
-        Generate custom radio imaging audio, ads, and promo tracks with Llama & MusicGen!
     </p>
     """,
     unsafe_allow_html=True
@@ -73,20 +76,21 @@ st.markdown(
 st.markdown("---")
 # ---------------------------------------------------------------------
-# Instructions Section in an Expander
 # ---------------------------------------------------------------------
 with st.expander("📘 How to Use This Web App"):
     st.markdown(
         """
-        1. **Enter your prompt**: Describe the type of audio you need (e.g., an energetic 15-second jingle for a pop radio promo).
-        2. **Generate Description**: Let Llama 2 (or another open-source model) refine your prompt into a creative script.
-        3. **Generate Audio**: Pass that script to MusicGen to get a custom audio file.
-        4. **Playback & Download**: Listen to your new track and download it for further editing.
-        **Tips**:
-        - Keep descriptions short & specific for best results.
-        - If the Llama model is too large, switch to a smaller open-source model or try a GPU-based environment.
-        - If you see errors about model permissions, ensure you’ve accepted the license on Hugging Face.
         """
     )
@@ -94,36 +98,59 @@ with st.expander("📘 How to Use This Web App"):
 # Sidebar: Model Selection & Options
 # ---------------------------------------------------------------------
 with st.sidebar:
-    st.header("🔧 Model Config")
-    # Llama 2 chat model from Hugging Face
     llama_model_id = st.text_input(
-        "Llama 2 Model ID on Hugging Face",
-        value="meta-llama/Llama-2-7b-chat-hf",
-        help="For example: meta-llama/Llama-2-7b-chat-hf (requires license acceptance)."
     )
     device_option = st.selectbox(
         "Hardware Device",
         ["auto", "cpu"],
-        help="If running locally with a GPU, choose 'auto'. If you only have a CPU, pick 'cpu'."
     )
 # ---------------------------------------------------------------------
 # Prompt Input
 # ---------------------------------------------------------------------
-st.markdown("## ✍🏻 Write Your Brief / Concept")
 prompt = st.text_area(
-    "Describe the radio imaging or jingle you want to create. Include style, mood, duration, etc.",
-    placeholder="e.g. 'An energetic 15-second pop jingle for a morning radio show, upbeat and fun...'"
 )
 # ---------------------------------------------------------------------
-# Text Generation with Llama
 # ---------------------------------------------------------------------
 @st.cache_resource
 def load_llama_pipeline(model_id: str, device: str):
     """
     Load the Llama or other open-source model as a text-generation pipeline.
-    The user must have accepted the license for certain models like Llama 2.
     """
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
@@ -139,49 +166,51 @@ def load_llama_pipeline(model_id: str, device: str):
     )
     return gen_pipeline
-def generate_description(user_prompt: str, pipeline_gen):
     """
-    Use the pipeline to create a refined description for MusicGen.
     """
-    # Instruction format for Llama 2 chat
-    # or simpler prompt if it's not a chat model
     system_prompt = (
-        "You are a helpful assistant specialized in creative advertising scripts and radio imaging. "
-        "Refine the user's short concept into a more detailed, creative script. "
-        "Keep it concise, but highlight any relevant tone, instruments, or style to guide music generation."
     )
-    # We'll feed a combined prompt
-    combined_prompt = f"{system_prompt}\nUser request: {user_prompt}\nYour refined script:"
-    # Generate text
     result = pipeline_gen(
         combined_prompt,
-        max_new_tokens=200,
         do_sample=True,
-        temperature=0.7
     )
-    # Extract generated text (some models output extra tokens or the entire prompt again)
     generated_text = result[0]["generated_text"]
-    # Attempt to cut out the system prompt if it reappears
-    # Just a heuristic: find the last occurrence of "script:" or any relevant marker
     if "script:" in generated_text.lower():
-        generated_text = generated_text.split("script:")[-1].strip()
-    # Optional: add a sign-off or credit line
-    generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Llama Edition)"
     return generated_text
 # Button: Generate Description
-if st.button("📄 Refine Description with Llama"):
     if not prompt.strip():
-        st.error("Please provide a brief concept before generating a description.")
     else:
         with st.spinner("Generating a refined description..."):
             try:
                 pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
-                refined_text = generate_description(prompt, pipeline_llama)
                 st.session_state['refined_prompt'] = refined_text
                 st.success("Description successfully refined!")
                 st.write(refined_text)
@@ -191,7 +220,7 @@ if st.button("📄 Refine Description with Llama"):
                     file_name="refined_description.txt"
                 )
             except Exception as e:
-                st.error(f"Error while generating with Llama: {e}")
 st.markdown("---")
@@ -207,30 +236,43 @@ def load_musicgen_model():
 if st.button("▶ Generate Audio with MusicGen"):
     if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
-        st.error("Please generate or have a refined description first.")
     else:
         descriptive_text = st.session_state['refined_prompt']
-        with st.spinner("Generating your audio... This can take a moment."):
             try:
                 musicgen_model, processor = load_musicgen_model()
-                # Use the refined prompt as input
                 inputs = processor(
-                    text=[descriptive_text],
                     padding=True,
                     return_tensors="pt"
                 )
-                audio_values = musicgen_model.generate(**inputs, max_new_tokens=512)
                 sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
                 # Save & display the audio
-                audio_filename = "radio_imaging_output.wav"
                 scipy.io.wavfile.write(
                     audio_filename,
                     rate=sampling_rate,
                     data=audio_values[0, 0].numpy()
                 )
                 st.success("Audio successfully generated!")
                 st.audio(audio_filename)
             except Exception as e:
                 st.error(f"Error while generating audio: {e}")
@@ -240,9 +282,9 @@ if st.button("▶ Generate Audio with MusicGen"):
 st.markdown("---")
 st.markdown(
     "<div class='footer-note'>"
-    "✅ Built with Llama 2 & MusicGen · "
-    "Created for radio imaging producers · "
-    "Feedback welcome at <a href='https://bilsimaging.com' target='_blank'>Bilsimaging</a>!"
     "</div>",
     unsafe_allow_html=True
 )

 st.set_page_config(
     page_icon="🎧",
     layout="wide",
+    page_title="Radio Imaging Audio Generator - Llama 3",
     initial_sidebar_state="expanded",
 )
 # ---------------------------------------------------------------------
+# Custom CSS for a Catchy UI
 # ---------------------------------------------------------------------
 CUSTOM_CSS = """
 <style>
 body {
+    background-color: #FAFCFF;
     color: #1F2937;
     font-family: 'Segoe UI', Tahoma, sans-serif;
 }
 h1, h2, h3, h4, h5, h6 {
     color: #3B82F6;
+    margin-bottom: 0.5em;
 }
 .stButton>button {
     background-color: #3B82F6 !important;
     color: #FFFFFF !important;
     border-radius: 8px !important;
     font-size: 16px !important;
+    margin: 0.5em 0;
 }
 .sidebar .sidebar-content {
     background: #E0F2FE;
 # ---------------------------------------------------------------------
 st.markdown(
     """
+    <h1>🎙 Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta with Llama 3)</span></h1>
     <p style='font-size:18px;'>
+        Generate custom radio ads, station promos, and jingles in multiple languages
+        using the **hypothetical Llama 3.3** Instruct model & MusicGen!
     </p>
     """,
     unsafe_allow_html=True
 st.markdown("---")
 # ---------------------------------------------------------------------
+# Instructions Section
 # ---------------------------------------------------------------------
 with st.expander("📘 How to Use This Web App"):
     st.markdown(
         """
+        1. **Enter a concept** in any language: Describe the style, mood, length, etc.
+        2. **Choose Language**: If you want a Spanish script, select Spanish below (multi-language).
+        3. **Refine with Llama 3**: Let the model transform your brief into a catchy script.
+        4. **Set Audio Options**: Choose a style (Rock, Pop, Classical...) and max tokens for MusicGen output.
+        5. **Generate Audio**: Listen & optionally download or upload the WAV file.
+        **Future Enhancements**:
+        - **User Authentication**: Restrict access or track usage with logins.
+        - **Advanced Fine-tuning**: Adjust Llama or MusicGen for specialized station branding.
+        - **Cloud Storage**: Upload final WAVs to a server or cloud bucket for easy sharing.
         """
     )
 # Sidebar: Model Selection & Options
 # ---------------------------------------------------------------------
 with st.sidebar:
+    st.header("🔧 Model & Audio Config")
+    # Llama 3 model ID on Hugging Face (hypothetical)
     llama_model_id = st.text_input(
+        "Llama 3 Instruct Model ID",
+        value="meta-llama/Llama-3.3-70B-Instruct",
+        help="Requires license acceptance on Hugging Face, if/when available."
     )
     device_option = st.selectbox(
         "Hardware Device",
         ["auto", "cpu"],
+        help="If running locally with a GPU, choose 'auto'. CPU-only might be slow for large models."
+    )
+    st.markdown("---")
+    # Multi-language prompt
+    language = st.selectbox(
+        "Choose Output Language",
+        ["English", "Spanish", "French", "German", "Other (explain in your prompt)"]
+    )
+    st.markdown("---")
+    # Audio style and tokens
+    music_style = st.selectbox(
+        "Preferred Music Style",
+        ["Pop", "Rock", "Electronic", "Classical", "Hip-Hop", "Reggae", "Ambient", "Other"]
+    )
+    audio_tokens = st.slider(
+        "MusicGen Max Tokens (Approx. Track Length)",
+        min_value=128, max_value=1024, value=512, step=64
     )
 # ---------------------------------------------------------------------
 # Prompt Input
 # ---------------------------------------------------------------------
+st.markdown("## ✍🏻 Write Your Concept Brief")
 prompt = st.text_area(
+    "Describe the radio imaging or jingle you want to create.",
+    placeholder="e.g. 'An energetic 15-second pop jingle in Spanish for a morning radio show...'"
 )
 # ---------------------------------------------------------------------
+# Text Generation with Llama 3
 # ---------------------------------------------------------------------
 @st.cache_resource
 def load_llama_pipeline(model_id: str, device: str):
     """
     Load the Llama or other open-source model as a text-generation pipeline.
+    This is hypothetical for Llama 3.3.
+    Must accept license on HF if the model is restricted.
     """
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
     )
     return gen_pipeline
+def generate_description(user_prompt: str, pipeline_gen, language_choice: str):
     """
+    Use the pipeline to create a refined description for MusicGen,
+    with multi-language capabilities.
     """
+    # Instruction for Llama (system prompt):
     system_prompt = (
+        "You are a creative ad copywriter specialized in radio imaging. "
+        "Refine the user's concept into a concise script. "
+        "Incorporate the language choice and creative elements for a promotional audio spot."
     )
+    # Combine user prompt + language + the system instructions
+    combined_prompt = (
+        f"{system_prompt}\n"
+        f"Language to use: {language_choice}\n"
+        f"User Concept: {user_prompt}\n"
+        f"Your refined ad script:"
+    )
     result = pipeline_gen(
         combined_prompt,
+        max_new_tokens=300,
         do_sample=True,
+        temperature=0.8
     )
     generated_text = result[0]["generated_text"]
+    # Attempt to isolate the script portion
     if "script:" in generated_text.lower():
+        generated_text = generated_text.split("script:", 1)[-1].strip()
+    # Add a sign-off or brand line
+    generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Powered by Llama 3)"
     return generated_text
 # Button: Generate Description
+if st.button("📄 Refine Description with Llama 3"):
     if not prompt.strip():
+        st.error("Please provide a concept before generating a description.")
     else:
         with st.spinner("Generating a refined description..."):
             try:
                 pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
+                refined_text = generate_description(prompt, pipeline_llama, language)
                 st.session_state['refined_prompt'] = refined_text
                 st.success("Description successfully refined!")
                 st.write(refined_text)
                     file_name="refined_description.txt"
                 )
             except Exception as e:
+                st.error(f"Error while generating with Llama 3: {e}")
 st.markdown("---")
 if st.button("▶ Generate Audio with MusicGen"):
     if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
+        st.error("Please generate or have a refined script before creating audio.")
     else:
         descriptive_text = st.session_state['refined_prompt']
+        with st.spinner("Generating your audio..."):
             try:
                 musicgen_model, processor = load_musicgen_model()
+                # Incorporate the style preference into the final text
+                final_text_for_music = f"{descriptive_text}\nStyle preference: {music_style}"
+                # Use the refined prompt + style as input
                 inputs = processor(
+                    text=[final_text_for_music],
                     padding=True,
                     return_tensors="pt"
                 )
+                # Adjust max_new_tokens for track length
+                audio_values = musicgen_model.generate(**inputs, max_new_tokens=audio_tokens)
                 sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
                 # Save & display the audio
+                audio_filename = f"radio_imaging_output_{music_style.lower()}.wav"
                 scipy.io.wavfile.write(
                     audio_filename,
                     rate=sampling_rate,
                     data=audio_values[0, 0].numpy()
                 )
                 st.success("Audio successfully generated!")
                 st.audio(audio_filename)
+                # Optionally, prompt to "Upload to Cloud" or "Save to Directory"
+                if st.checkbox("Upload this WAV to cloud storage? (Demo)"):
+                    with st.spinner("Uploading... (This is a placeholder)"):
+                        # Pseudocode for your custom logic, e.g.:
+                        # upload_to_s3(audio_filename, bucket_name="radio-imaging-bucket")
+                        st.success("File uploaded to your cloud storage (placeholder).")
             except Exception as e:
                 st.error(f"Error while generating audio: {e}")
 st.markdown("---")
 st.markdown(
     "<div class='footer-note'>"
+    "✅ Built with a hypothetical Llama 3.3 & MusicGen · "
+    "Multi-language, advanced styles, and a hint of future expansions · "
+    "Happy producing!"
     "</div>",
     unsafe_allow_html=True
 )