Bils commited on
Commit
621eae6
Β·
verified Β·
1 Parent(s): 5102a64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -59
app.py CHANGED
@@ -15,28 +15,30 @@ from transformers import (
15
  st.set_page_config(
16
  page_icon="🎧",
17
  layout="wide",
18
- page_title="Radio Imaging Audio Generator - Llama & MusicGen",
19
  initial_sidebar_state="expanded",
20
  )
21
 
22
  # ---------------------------------------------------------------------
23
- # Custom CSS for a Vibrant UI
24
  # ---------------------------------------------------------------------
25
  CUSTOM_CSS = """
26
  <style>
27
  body {
28
- background-color: #F8FBFE;
29
  color: #1F2937;
30
  font-family: 'Segoe UI', Tahoma, sans-serif;
31
  }
32
  h1, h2, h3, h4, h5, h6 {
33
  color: #3B82F6;
 
34
  }
35
  .stButton>button {
36
  background-color: #3B82F6 !important;
37
  color: #FFFFFF !important;
38
  border-radius: 8px !important;
39
  font-size: 16px !important;
 
40
  }
41
  .sidebar .sidebar-content {
42
  background: #E0F2FE;
@@ -63,9 +65,10 @@ st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
63
  # ---------------------------------------------------------------------
64
  st.markdown(
65
  """
66
- <h1>Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta)</span></h1>
67
  <p style='font-size:18px;'>
68
- Generate custom radio imaging audio, ads, and promo tracks with Llama & MusicGen!
 
69
  </p>
70
  """,
71
  unsafe_allow_html=True
@@ -73,20 +76,21 @@ st.markdown(
73
  st.markdown("---")
74
 
75
  # ---------------------------------------------------------------------
76
- # Instructions Section in an Expander
77
  # ---------------------------------------------------------------------
78
  with st.expander("πŸ“˜ How to Use This Web App"):
79
  st.markdown(
80
  """
81
- 1. **Enter your prompt**: Describe the type of audio you need (e.g., an energetic 15-second jingle for a pop radio promo).
82
- 2. **Generate Description**: Let Llama 2 (or another open-source model) refine your prompt into a creative script.
83
- 3. **Generate Audio**: Pass that script to MusicGen to get a custom audio file.
84
- 4. **Playback & Download**: Listen to your new track and download it for further editing.
 
85
 
86
- **Tips**:
87
- - Keep descriptions short & specific for best results.
88
- - If the Llama model is too large, switch to a smaller open-source model or try a GPU-based environment.
89
- - If you see errors about model permissions, ensure you’ve accepted the license on Hugging Face.
90
  """
91
  )
92
 
@@ -94,36 +98,59 @@ with st.expander("πŸ“˜ How to Use This Web App"):
94
  # Sidebar: Model Selection & Options
95
  # ---------------------------------------------------------------------
96
  with st.sidebar:
97
- st.header("πŸ”§ Model Config")
98
- # Llama 2 chat model from Hugging Face
 
99
  llama_model_id = st.text_input(
100
- "Llama 2 Model ID on Hugging Face",
101
- value="meta-llama/Llama-2-7b-chat-hf",
102
- help="For example: meta-llama/Llama-2-7b-chat-hf (requires license acceptance)."
103
  )
 
104
  device_option = st.selectbox(
105
  "Hardware Device",
106
  ["auto", "cpu"],
107
- help="If running locally with a GPU, choose 'auto'. If you only have a CPU, pick 'cpu'."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  )
109
 
110
  # ---------------------------------------------------------------------
111
  # Prompt Input
112
  # ---------------------------------------------------------------------
113
- st.markdown("## ✍🏻 Write Your Brief / Concept")
114
  prompt = st.text_area(
115
- "Describe the radio imaging or jingle you want to create. Include style, mood, duration, etc.",
116
- placeholder="e.g. 'An energetic 15-second pop jingle for a morning radio show, upbeat and fun...'"
117
  )
118
 
119
  # ---------------------------------------------------------------------
120
- # Text Generation with Llama
121
  # ---------------------------------------------------------------------
122
  @st.cache_resource
123
  def load_llama_pipeline(model_id: str, device: str):
124
  """
125
  Load the Llama or other open-source model as a text-generation pipeline.
126
- The user must have accepted the license for certain models like Llama 2.
 
127
  """
128
  tokenizer = AutoTokenizer.from_pretrained(model_id)
129
  model = AutoModelForCausalLM.from_pretrained(
@@ -139,49 +166,51 @@ def load_llama_pipeline(model_id: str, device: str):
139
  )
140
  return gen_pipeline
141
 
142
- def generate_description(user_prompt: str, pipeline_gen):
143
  """
144
- Use the pipeline to create a refined description for MusicGen.
 
145
  """
146
- # Instruction format for Llama 2 chat
147
- # or simpler prompt if it's not a chat model
148
  system_prompt = (
149
- "You are a helpful assistant specialized in creative advertising scripts and radio imaging. "
150
- "Refine the user's short concept into a more detailed, creative script. "
151
- "Keep it concise, but highlight any relevant tone, instruments, or style to guide music generation."
152
  )
153
-
154
- # We'll feed a combined prompt
155
- combined_prompt = f"{system_prompt}\nUser request: {user_prompt}\nYour refined script:"
156
-
157
- # Generate text
 
 
 
 
158
  result = pipeline_gen(
159
  combined_prompt,
160
- max_new_tokens=200,
161
  do_sample=True,
162
- temperature=0.7
163
  )
164
- # Extract generated text (some models output extra tokens or the entire prompt again)
165
  generated_text = result[0]["generated_text"]
166
-
167
- # Attempt to cut out the system prompt if it reappears
168
- # Just a heuristic: find the last occurrence of "script:" or any relevant marker
169
  if "script:" in generated_text.lower():
170
- generated_text = generated_text.split("script:")[-1].strip()
171
 
172
- # Optional: add a sign-off or credit line
173
- generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Llama Edition)"
174
  return generated_text
175
 
176
  # Button: Generate Description
177
- if st.button("πŸ“„ Refine Description with Llama"):
178
  if not prompt.strip():
179
- st.error("Please provide a brief concept before generating a description.")
180
  else:
181
  with st.spinner("Generating a refined description..."):
182
  try:
183
  pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
184
- refined_text = generate_description(prompt, pipeline_llama)
185
  st.session_state['refined_prompt'] = refined_text
186
  st.success("Description successfully refined!")
187
  st.write(refined_text)
@@ -191,7 +220,7 @@ if st.button("πŸ“„ Refine Description with Llama"):
191
  file_name="refined_description.txt"
192
  )
193
  except Exception as e:
194
- st.error(f"Error while generating with Llama: {e}")
195
 
196
  st.markdown("---")
197
 
@@ -207,30 +236,43 @@ def load_musicgen_model():
207
 
208
  if st.button("β–Ά Generate Audio with MusicGen"):
209
  if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
210
- st.error("Please generate or have a refined description first.")
211
  else:
212
  descriptive_text = st.session_state['refined_prompt']
213
- with st.spinner("Generating your audio... This can take a moment."):
214
  try:
215
  musicgen_model, processor = load_musicgen_model()
216
- # Use the refined prompt as input
 
 
 
 
217
  inputs = processor(
218
- text=[descriptive_text],
219
  padding=True,
220
  return_tensors="pt"
221
  )
222
- audio_values = musicgen_model.generate(**inputs, max_new_tokens=512)
 
223
  sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
224
 
225
  # Save & display the audio
226
- audio_filename = "radio_imaging_output.wav"
227
  scipy.io.wavfile.write(
228
  audio_filename,
229
  rate=sampling_rate,
230
  data=audio_values[0, 0].numpy()
231
  )
 
232
  st.success("Audio successfully generated!")
233
  st.audio(audio_filename)
 
 
 
 
 
 
 
234
  except Exception as e:
235
  st.error(f"Error while generating audio: {e}")
236
 
@@ -240,9 +282,9 @@ if st.button("β–Ά Generate Audio with MusicGen"):
240
  st.markdown("---")
241
  st.markdown(
242
  "<div class='footer-note'>"
243
- "βœ… Built with Llama 2 & MusicGen Β· "
244
- "Created for radio imaging producers Β· "
245
- "Feedback welcome at <a href='https://bilsimaging.com' target='_blank'>Bilsimaging</a>!"
246
  "</div>",
247
  unsafe_allow_html=True
248
  )
 
15
  st.set_page_config(
16
  page_icon="🎧",
17
  layout="wide",
18
+ page_title="Radio Imaging Audio Generator - Llama 3",
19
  initial_sidebar_state="expanded",
20
  )
21
 
22
  # ---------------------------------------------------------------------
23
+ # Custom CSS for a Catchy UI
24
  # ---------------------------------------------------------------------
25
  CUSTOM_CSS = """
26
  <style>
27
  body {
28
+ background-color: #FAFCFF;
29
  color: #1F2937;
30
  font-family: 'Segoe UI', Tahoma, sans-serif;
31
  }
32
  h1, h2, h3, h4, h5, h6 {
33
  color: #3B82F6;
34
+ margin-bottom: 0.5em;
35
  }
36
  .stButton>button {
37
  background-color: #3B82F6 !important;
38
  color: #FFFFFF !important;
39
  border-radius: 8px !important;
40
  font-size: 16px !important;
41
+ margin: 0.5em 0;
42
  }
43
  .sidebar .sidebar-content {
44
  background: #E0F2FE;
 
65
  # ---------------------------------------------------------------------
66
  st.markdown(
67
  """
68
+ <h1>πŸŽ™ Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta with Llama 3)</span></h1>
69
  <p style='font-size:18px;'>
70
+ Generate custom radio ads, station promos, and jingles in multiple languages
71
+ using the **hypothetical Llama 3.3** Instruct model & MusicGen!
72
  </p>
73
  """,
74
  unsafe_allow_html=True
 
76
  st.markdown("---")
77
 
78
  # ---------------------------------------------------------------------
79
+ # Instructions Section
80
  # ---------------------------------------------------------------------
81
  with st.expander("πŸ“˜ How to Use This Web App"):
82
  st.markdown(
83
  """
84
+ 1. **Enter a concept** in any language: Describe the style, mood, length, etc.
85
+ 2. **Choose Language**: If you want a Spanish script, select Spanish below (multi-language).
86
+ 3. **Refine with Llama 3**: Let the model transform your brief into a catchy script.
87
+ 4. **Set Audio Options**: Choose a style (Rock, Pop, Classical...) and max tokens for MusicGen output.
88
+ 5. **Generate Audio**: Listen & optionally download or upload the WAV file.
89
 
90
+ **Future Enhancements**:
91
+ - **User Authentication**: Restrict access or track usage with logins.
92
+ - **Advanced Fine-tuning**: Adjust Llama or MusicGen for specialized station branding.
93
+ - **Cloud Storage**: Upload final WAVs to a server or cloud bucket for easy sharing.
94
  """
95
  )
96
 
 
98
  # Sidebar: Model Selection & Options
99
  # ---------------------------------------------------------------------
100
  with st.sidebar:
101
+ st.header("πŸ”§ Model & Audio Config")
102
+
103
+ # Llama 3 model ID on Hugging Face (hypothetical)
104
  llama_model_id = st.text_input(
105
+ "Llama 3 Instruct Model ID",
106
+ value="meta-llama/Llama-3.3-70B-Instruct",
107
+ help="Requires license acceptance on Hugging Face, if/when available."
108
  )
109
+
110
  device_option = st.selectbox(
111
  "Hardware Device",
112
  ["auto", "cpu"],
113
+ help="If running locally with a GPU, choose 'auto'. CPU-only might be slow for large models."
114
+ )
115
+
116
+ st.markdown("---")
117
+
118
+ # Multi-language prompt
119
+ language = st.selectbox(
120
+ "Choose Output Language",
121
+ ["English", "Spanish", "French", "German", "Other (explain in your prompt)"]
122
+ )
123
+
124
+ st.markdown("---")
125
+
126
+ # Audio style and tokens
127
+ music_style = st.selectbox(
128
+ "Preferred Music Style",
129
+ ["Pop", "Rock", "Electronic", "Classical", "Hip-Hop", "Reggae", "Ambient", "Other"]
130
+ )
131
+ audio_tokens = st.slider(
132
+ "MusicGen Max Tokens (Approx. Track Length)",
133
+ min_value=128, max_value=1024, value=512, step=64
134
  )
135
 
136
  # ---------------------------------------------------------------------
137
  # Prompt Input
138
  # ---------------------------------------------------------------------
139
+ st.markdown("## ✍🏻 Write Your Concept Brief")
140
  prompt = st.text_area(
141
+ "Describe the radio imaging or jingle you want to create.",
142
+ placeholder="e.g. 'An energetic 15-second pop jingle in Spanish for a morning radio show...'"
143
  )
144
 
145
  # ---------------------------------------------------------------------
146
+ # Text Generation with Llama 3
147
  # ---------------------------------------------------------------------
148
  @st.cache_resource
149
  def load_llama_pipeline(model_id: str, device: str):
150
  """
151
  Load the Llama or other open-source model as a text-generation pipeline.
152
+ This is hypothetical for Llama 3.3.
153
+ Must accept license on HF if the model is restricted.
154
  """
155
  tokenizer = AutoTokenizer.from_pretrained(model_id)
156
  model = AutoModelForCausalLM.from_pretrained(
 
166
  )
167
  return gen_pipeline
168
 
169
+ def generate_description(user_prompt: str, pipeline_gen, language_choice: str):
170
  """
171
+ Use the pipeline to create a refined description for MusicGen,
172
+ with multi-language capabilities.
173
  """
174
+ # Instruction for Llama (system prompt):
 
175
  system_prompt = (
176
+ "You are a creative ad copywriter specialized in radio imaging. "
177
+ "Refine the user's concept into a concise script. "
178
+ "Incorporate the language choice and creative elements for a promotional audio spot."
179
  )
180
+
181
+ # Combine user prompt + language + the system instructions
182
+ combined_prompt = (
183
+ f"{system_prompt}\n"
184
+ f"Language to use: {language_choice}\n"
185
+ f"User Concept: {user_prompt}\n"
186
+ f"Your refined ad script:"
187
+ )
188
+
189
  result = pipeline_gen(
190
  combined_prompt,
191
+ max_new_tokens=300,
192
  do_sample=True,
193
+ temperature=0.8
194
  )
 
195
  generated_text = result[0]["generated_text"]
196
+
197
+ # Attempt to isolate the script portion
 
198
  if "script:" in generated_text.lower():
199
+ generated_text = generated_text.split("script:", 1)[-1].strip()
200
 
201
+ # Add a sign-off or brand line
202
+ generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Powered by Llama 3)"
203
  return generated_text
204
 
205
  # Button: Generate Description
206
+ if st.button("πŸ“„ Refine Description with Llama 3"):
207
  if not prompt.strip():
208
+ st.error("Please provide a concept before generating a description.")
209
  else:
210
  with st.spinner("Generating a refined description..."):
211
  try:
212
  pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
213
+ refined_text = generate_description(prompt, pipeline_llama, language)
214
  st.session_state['refined_prompt'] = refined_text
215
  st.success("Description successfully refined!")
216
  st.write(refined_text)
 
220
  file_name="refined_description.txt"
221
  )
222
  except Exception as e:
223
+ st.error(f"Error while generating with Llama 3: {e}")
224
 
225
  st.markdown("---")
226
 
 
236
 
237
  if st.button("β–Ά Generate Audio with MusicGen"):
238
  if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
239
+ st.error("Please generate or have a refined script before creating audio.")
240
  else:
241
  descriptive_text = st.session_state['refined_prompt']
242
+ with st.spinner("Generating your audio..."):
243
  try:
244
  musicgen_model, processor = load_musicgen_model()
245
+
246
+ # Incorporate the style preference into the final text
247
+ final_text_for_music = f"{descriptive_text}\nStyle preference: {music_style}"
248
+
249
+ # Use the refined prompt + style as input
250
  inputs = processor(
251
+ text=[final_text_for_music],
252
  padding=True,
253
  return_tensors="pt"
254
  )
255
+ # Adjust max_new_tokens for track length
256
+ audio_values = musicgen_model.generate(**inputs, max_new_tokens=audio_tokens)
257
  sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
258
 
259
  # Save & display the audio
260
+ audio_filename = f"radio_imaging_output_{music_style.lower()}.wav"
261
  scipy.io.wavfile.write(
262
  audio_filename,
263
  rate=sampling_rate,
264
  data=audio_values[0, 0].numpy()
265
  )
266
+
267
  st.success("Audio successfully generated!")
268
  st.audio(audio_filename)
269
+
270
+ # Optionally, prompt to "Upload to Cloud" or "Save to Directory"
271
+ if st.checkbox("Upload this WAV to cloud storage? (Demo)"):
272
+ with st.spinner("Uploading... (This is a placeholder)"):
273
+ # Pseudocode for your custom logic, e.g.:
274
+ # upload_to_s3(audio_filename, bucket_name="radio-imaging-bucket")
275
+ st.success("File uploaded to your cloud storage (placeholder).")
276
  except Exception as e:
277
  st.error(f"Error while generating audio: {e}")
278
 
 
282
  st.markdown("---")
283
  st.markdown(
284
  "<div class='footer-note'>"
285
+ "βœ… Built with a hypothetical Llama 3.3 & MusicGen Β· "
286
+ "Multi-language, advanced styles, and a hint of future expansions Β· "
287
+ "Happy producing!"
288
  "</div>",
289
  unsafe_allow_html=True
290
  )