Nick021402 commited on
Commit
8a0bb11
·
verified ·
1 Parent(s): 7423004

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -58
app.py CHANGED
@@ -1,58 +1,71 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import torch
4
- from transformers import AutoProcessor, MusicgenForConditionalGeneration
5
-
6
- # Load emotion classifier
7
- emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
8
-
9
- # Load music generator (small for CPU)
10
- music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
11
- processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
12
-
13
- # Map emotion to style/genre prompts
14
- EMOTION_TO_MUSIC = {
15
- "joy": "happy upbeat piano melody",
16
- "anger": "intense aggressive drums",
17
- "sadness": "slow emotional violin",
18
- "fear": "dark ambient synth",
19
- "love": "soft romantic acoustic guitar",
20
- "surprise": "quirky playful tune",
21
- "neutral": "chill background lofi beat"
22
- }
23
-
24
- # Main generation function
25
- def generate_music(user_input):
26
- # Step 1: Detect emotion
27
- emotion_scores = emotion_classifier(user_input)[0]
28
- top_emotion = max(emotion_scores, key=lambda x: x["score"])["label"]
29
-
30
- # Step 2: Generate prompt
31
- music_prompt = EMOTION_TO_MUSIC.get(top_emotion.lower(), "ambient melody")
32
-
33
- # Step 3: Generate music
34
- inputs = processor(text=[music_prompt], return_tensors="pt")
35
- audio_values = music_model.generate(**inputs, max_new_tokens=1024)
36
-
37
- # Convert audio tensor to array
38
- audio_array = audio_values[0].cpu().numpy()
39
-
40
- # Return result
41
- return f"Top Emotion: {top_emotion}", (16000, audio_array)
42
-
43
- # Gradio UI
44
- with gr.Blocks() as demo:
45
- gr.Markdown("# Emotion-to-Music AI")
46
- gr.Markdown("Describe how you feel and get a unique music track matching your mood!")
47
-
48
- with gr.Row():
49
- text_input = gr.Textbox(label="How are you feeling?")
50
- generate_btn = gr.Button("Generate Music")
51
-
52
- with gr.Row():
53
- emotion_output = gr.Textbox(label="Detected Emotion")
54
- audio_output = gr.Audio(label="Generated Music", type="numpy")
55
-
56
- generate_btn.click(fn=generate_music, inputs=text_input, outputs=[emotion_output, audio_output])
57
-
58
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
+ from transformers import AutoProcessor, MusicgenForConditionalGeneration
5
+ import numpy as np # Import numpy
6
+
7
+ # Load emotion classifier
8
+ emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
9
+
10
+ # Load music generator (small for CPU)
11
+ music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
12
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
13
+
14
+ # Map emotion to style/genre prompts
15
+ EMOTION_TO_MUSIC = {
16
+ "joy": "happy upbeat piano melody",
17
+ "anger": "intense aggressive drums",
18
+ "sadness": "slow emotional violin",
19
+ "fear": "dark ambient synth",
20
+ "love": "soft romantic acoustic guitar",
21
+ "surprise": "quirky playful tune",
22
+ "neutral": "chill background lofi beat"
23
+ }
24
+
25
+ # Main generation function
26
+ def generate_music(user_input):
27
+ # Step 1: Detect emotion
28
+ emotion_scores = emotion_classifier(user_input)[0]
29
+ top_emotion = max(emotion_scores, key=lambda x: x["score"])["label"]
30
+
31
+ # Step 2: Generate prompt
32
+ music_prompt = EMOTION_TO_MUSIC.get(top_emotion.lower(), "ambient melody")
33
+
34
+ # Step 3: Generate music
35
+ inputs = processor(text=[music_prompt], return_tensors="pt")
36
+ audio_values = music_model.generate(**inputs, max_new_tokens=1024)
37
+
38
+ # Convert audio tensor to numpy array
39
+ audio_array = audio_values[0].cpu().numpy()
40
+
41
+ # --- FIX START ---
42
+ # Normalize the audio array to be within the range of a 16-bit PCM WAV file
43
+ # The default sampling rate for musicgen-small is 16000 Hz, and Gradio expects
44
+ # values to be scaled for 16-bit integers if not float.
45
+ # We'll normalize to -1 to 1 for float and let Gradio handle the 16-bit conversion.
46
+ # However, to be extra safe, ensure max amplitude is 1.0.
47
+ audio_array = audio_array / np.max(np.abs(audio_array))
48
+ # --- FIX END ---
49
+
50
+
51
+ # Return result
52
+ # The Musicgen model outputs audio at a sampling rate of 16kHz
53
+ sampling_rate = 16000
54
+ return f"Top Emotion: {top_emotion}", (sampling_rate, audio_array)
55
+
56
+ # Gradio UI
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("# Emotion-to-Music AI")
59
+ gr.Markdown("Describe how you feel and get a unique music track matching your mood!")
60
+
61
+ with gr.Row():
62
+ text_input = gr.Textbox(label="How are you feeling?")
63
+ generate_btn = gr.Button("Generate Music")
64
+
65
+ with gr.Row():
66
+ emotion_output = gr.Textbox(label="Detected Emotion")
67
+ audio_output = gr.Audio(label="Generated Music", type="numpy") # type="numpy" is correct here
68
+
69
+ generate_btn.click(fn=generate_music, inputs=text_input, outputs=[emotion_output, audio_output])
70
+
71
+ demo.launch()