Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,8 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
|
|
10 |
# Initialize the ASR pipeline
|
11 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
12 |
|
|
|
|
|
13 |
def speech_to_text(speech):
|
14 |
"""Converts speech to text using the ASR pipeline."""
|
15 |
return asr(speech)["text"]
|
@@ -51,19 +53,18 @@ def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, r
|
|
51 |
return playlist_message
|
52 |
return output
|
53 |
|
54 |
-
|
55 |
def format_prompt(message, history):
|
56 |
"""Formats the prompt including fixed instructions and conversation history."""
|
57 |
-
fixed_prompt
|
58 |
-
|
59 |
|
60 |
-
|
61 |
|
62 |
-
|
63 |
|
64 |
-
|
65 |
|
66 |
-
|
67 |
|
68 |
Examples
|
69 |
User: What is C programming?
|
@@ -223,14 +224,13 @@ def format_prompt(message, history):
|
|
223 |
|
224 |
User: Lets turn up the music and have some fun!
|
225 |
LLM Response: Party
|
226 |
-
"""
|
227 |
-
|
228 |
prompt = f"{fixed_prompt}\n"
|
229 |
for user_prompt, bot_response in history:
|
230 |
prompt += f"User: {user_prompt}\nLLM Response: {bot_response}\n"
|
231 |
prompt += f"User: {message}\nLLM Response:"
|
232 |
return prompt
|
233 |
-
|
234 |
async def text_to_speech(text):
|
235 |
communicate = edge_tts.Communicate(text)
|
236 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
@@ -252,6 +252,9 @@ async def generate_audio(history):
|
|
252 |
return audio_path
|
253 |
return None
|
254 |
|
|
|
|
|
|
|
255 |
# Gradio interface setup
|
256 |
with gr.Blocks() as demo:
|
257 |
gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
|
@@ -266,6 +269,9 @@ with gr.Blocks() as demo:
|
|
266 |
submit = gr.Button("Send")
|
267 |
voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
|
268 |
|
|
|
|
|
|
|
269 |
# Handle text input
|
270 |
msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
|
271 |
generate_audio, inputs=[state], outputs=[audio_output]
|
|
|
10 |
# Initialize the ASR pipeline
|
11 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
12 |
|
13 |
+
INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
|
14 |
+
|
15 |
def speech_to_text(speech):
|
16 |
"""Converts speech to text using the ASR pipeline."""
|
17 |
return asr(speech)["text"]
|
|
|
53 |
return playlist_message
|
54 |
return output
|
55 |
|
|
|
56 |
def format_prompt(message, history):
|
57 |
"""Formats the prompt including fixed instructions and conversation history."""
|
58 |
+
fixed_prompt= """
|
59 |
+
You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
|
60 |
|
61 |
+
Note: Do not write anything else other than the classified mood if classified.
|
62 |
|
63 |
+
Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
|
64 |
|
65 |
+
Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
|
66 |
|
67 |
+
Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
|
68 |
|
69 |
Examples
|
70 |
User: What is C programming?
|
|
|
224 |
|
225 |
User: Lets turn up the music and have some fun!
|
226 |
LLM Response: Party
|
227 |
+
"""
|
|
|
228 |
prompt = f"{fixed_prompt}\n"
|
229 |
for user_prompt, bot_response in history:
|
230 |
prompt += f"User: {user_prompt}\nLLM Response: {bot_response}\n"
|
231 |
prompt += f"User: {message}\nLLM Response:"
|
232 |
return prompt
|
233 |
+
|
234 |
async def text_to_speech(text):
|
235 |
communicate = edge_tts.Communicate(text)
|
236 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
|
|
252 |
return audio_path
|
253 |
return None
|
254 |
|
255 |
+
def init_chat():
|
256 |
+
return [("", INITIAL_MESSAGE)], [("", INITIAL_MESSAGE)], None
|
257 |
+
|
258 |
# Gradio interface setup
|
259 |
with gr.Blocks() as demo:
|
260 |
gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
|
|
|
269 |
submit = gr.Button("Send")
|
270 |
voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
|
271 |
|
272 |
+
# Initialize chat with greeting
|
273 |
+
demo.load(init_chat, outputs=[state, chatbot, audio_output])
|
274 |
+
|
275 |
# Handle text input
|
276 |
msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
|
277 |
generate_audio, inputs=[state], outputs=[audio_output]
|