syedmudassir16 commited on
Commit
8413e00
·
verified ·
1 Parent(s): c163697

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -10,6 +10,8 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
10
  # Initialize the ASR pipeline
11
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
12
 
 
 
13
  def speech_to_text(speech):
14
  """Converts speech to text using the ASR pipeline."""
15
  return asr(speech)["text"]
@@ -51,19 +53,18 @@ def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, r
51
  return playlist_message
52
  return output
53
 
54
-
55
  def format_prompt(message, history):
56
  """Formats the prompt including fixed instructions and conversation history."""
57
- fixed_prompt = """
58
- You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user's mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
59
 
60
- Note: Do not write anything else other than the classified mood if classified.
61
 
62
- Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
63
 
64
- Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
65
 
66
- Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
67
 
68
  Examples
69
  User: What is C programming?
@@ -223,14 +224,13 @@ def format_prompt(message, history):
223
 
224
  User: Lets turn up the music and have some fun!
225
  LLM Response: Party
226
- """
227
-
228
  prompt = f"{fixed_prompt}\n"
229
  for user_prompt, bot_response in history:
230
  prompt += f"User: {user_prompt}\nLLM Response: {bot_response}\n"
231
  prompt += f"User: {message}\nLLM Response:"
232
  return prompt
233
-
234
  async def text_to_speech(text):
235
  communicate = edge_tts.Communicate(text)
236
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
@@ -252,6 +252,9 @@ async def generate_audio(history):
252
  return audio_path
253
  return None
254
 
 
 
 
255
  # Gradio interface setup
256
  with gr.Blocks() as demo:
257
  gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
@@ -266,6 +269,9 @@ with gr.Blocks() as demo:
266
  submit = gr.Button("Send")
267
  voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
268
 
 
 
 
269
  # Handle text input
270
  msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
271
  generate_audio, inputs=[state], outputs=[audio_output]
 
10
  # Initialize the ASR pipeline
11
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
12
 
13
+ INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
14
+
15
  def speech_to_text(speech):
16
  """Converts speech to text using the ASR pipeline."""
17
  return asr(speech)["text"]
 
53
  return playlist_message
54
  return output
55
 
 
56
  def format_prompt(message, history):
57
  """Formats the prompt including fixed instructions and conversation history."""
58
+ fixed_prompt= """
59
+ You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the users mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
60
 
61
+ Note: Do not write anything else other than the classified mood if classified.
62
 
63
+ Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
64
 
65
+ Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
66
 
67
+ Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
68
 
69
  Examples
70
  User: What is C programming?
 
224
 
225
  User: Lets turn up the music and have some fun!
226
  LLM Response: Party
227
+ """
 
228
  prompt = f"{fixed_prompt}\n"
229
  for user_prompt, bot_response in history:
230
  prompt += f"User: {user_prompt}\nLLM Response: {bot_response}\n"
231
  prompt += f"User: {message}\nLLM Response:"
232
  return prompt
233
+
234
  async def text_to_speech(text):
235
  communicate = edge_tts.Communicate(text)
236
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
 
252
  return audio_path
253
  return None
254
 
255
+ def init_chat():
256
+ return [("", INITIAL_MESSAGE)], [("", INITIAL_MESSAGE)], None
257
+
258
  # Gradio interface setup
259
  with gr.Blocks() as demo:
260
  gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
 
269
  submit = gr.Button("Send")
270
  voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
271
 
272
+ # Initialize chat with greeting
273
+ demo.load(init_chat, outputs=[state, chatbot, audio_output])
274
+
275
  # Handle text input
276
  msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
277
  generate_audio, inputs=[state], outputs=[audio_output]