VanYsa commited on
Commit
c9b4e47
·
1 Parent(s): 7143888

Update app.py

Browse files

Edited texts and added a no function repeat button

Files changed (1) hide show
  1. app.py +52 -25
app.py CHANGED
@@ -21,10 +21,10 @@ MAX_AUDIO_SECONDS = 40 # wont try to transcribe if longer than this
21
  DESCRIPTION = '''
22
  <div>
23
  <h1 style='text-align: center'>MyAlexa: Voice Chat Assistant</h1>
24
- <p style='text-align: center'>This is a demo of a voice chat that accepts an audio input up to 40 seconds long. Transcription and responses are limited to the English language.</p>
25
- <p>This Space uses nvidia's canaray 1b model to transcribe input audio, LLama3 for LLM and VITS for TTS <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama3 8b Chat</b></a>. Meta Llama3 is the new open LLM and comes in two sizes: 8b and 70b. Feel free to play with it, or duplicate to run privately!</p>
26
- <p>May not work with audio files longer than 40 seconds if cuda is not available <a href="https://huggingface.co/blog/llama3">at our blog post</a>.</p>
27
- <p>🦕 Looking for an even more powerful model? Check out the <a href="https://huggingface.co/chat/"><b>Hugging Chat</b></a> integration for Meta Llama 3 70b</p>
28
  </div>
29
  '''
30
  PLACEHOLDER = """
@@ -63,7 +63,7 @@ amp_dtype = torch.float16
63
  def convert_audio(audio_filepath, tmpdir, utt_id):
64
  """
65
  Convert all files to monochannel 16 kHz wav files.
66
- Do not convert and raise error if audio too long.
67
  Returns output filename and duration.
68
  """
69
 
@@ -90,6 +90,11 @@ def convert_audio(audio_filepath, tmpdir, utt_id):
90
 
91
 
92
  def transcribe(audio_filepath):
 
 
 
 
 
93
 
94
  if audio_filepath is None:
95
  raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
@@ -121,25 +126,28 @@ def transcribe(audio_filepath):
121
  return output_text
122
 
123
  def add_message(history, message):
124
- history.append((message, None))
125
- return history, gr.Textbox(value="", interactive=False)
 
 
 
 
126
 
127
  def bot(history):
128
- response = "**That's cool!**" #TODO Llama3 response
129
- history[-1][1] = ""
130
- for character in response:
131
- history[-1][1] += character
132
- time.sleep(0.05)
133
- yield history
 
 
 
134
 
135
  with gr.Blocks(
136
- title="NeMo Canary Model",
137
  css="""
138
  textarea { font-size: 18px;}
139
- #chat_input span {
140
- font-size: 18px;
141
- font-weight: bold;
142
- }
143
  """,
144
  theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg) # make text slightly bigger (default is text_md )
145
  ) as demo:
@@ -163,30 +171,49 @@ with gr.Blocks(
163
 
164
  with gr.Column():
165
 
166
- gr.HTML("<p><b>Step 3:</b> Transcribe audio</p>")
167
 
168
- go_button = gr.Button(
169
- value="Transcribe audio",
170
- variant="primary", # make "primary" so it stands out (default is "secondary")
171
  )
172
 
173
  chat_input = gr.Textbox(
174
  label="Transcribed text:",
175
- interactive=True,
176
  placeholder="Enter message",
177
  elem_id="chat_input",
178
  visible=False
179
  )
180
- clear = gr.ClearButton([chatbot])
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  chat_msg = chat_input.change(add_message, [chatbot, chat_input], [chatbot, chat_input])
183
  bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
184
  bot_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input])
185
 
186
- go_button.click(
187
  fn=transcribe,
188
  inputs = [audio_file],
189
  outputs = [chat_input]
 
 
 
 
 
 
190
  )
191
 
192
  demo.queue()
 
21
  DESCRIPTION = '''
22
  <div>
23
  <h1 style='text-align: center'>MyAlexa: Voice Chat Assistant</h1>
24
+ <p style='text-align: center'>MyAlexa is a demo of a voice chat assistant that accepts audio input and outputs a voice response with chat logs. </p>
25
+ <p>This space uses <a href="https://huggingface.co/nvidia/canary-1b"><b>NVIDIA Canary 1B</b></a> for Automatic Speech-to-text Recognition (ASR), <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama 3 8B Insruct</b></a> for the large language model (LLM) and <a href="https://https://huggingface.co/docs/transformers/en/model_doc/vits"><b>VITS</b></a> for text to speech (TTS).</p>
26
+ <p>This demo accepts inputs not more than 40 seconds long.</p>
27
+ <p>Transcription and responses are limited to the English language.</p>
28
  </div>
29
  '''
30
  PLACEHOLDER = """
 
63
  def convert_audio(audio_filepath, tmpdir, utt_id):
64
  """
65
  Convert all files to monochannel 16 kHz wav files.
66
+ Do not convert and raise error if audio is too long.
67
  Returns output filename and duration.
68
  """
69
 
 
90
 
91
 
92
  def transcribe(audio_filepath):
93
+ """
94
+ Transcribes a converted audio file.
95
+ Set to english language with punctuations.
96
+ Returns the output text.
97
+ """
98
 
99
  if audio_filepath is None:
100
  raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
 
126
  return output_text
127
 
128
  def add_message(history, message):
129
+ """
130
+ Adds the input message in the chatbot.
131
+ Returns the updated chatbot with an empty input textbox.
132
+ """
133
+ history.append((message, None))
134
+ return history, gr.Textbox(value="", interactive=False)
135
 
136
  def bot(history):
137
+ """
138
+ Prints the LLM's response in the chatbot
139
+ """
140
+ response = "**That's cool!**" #TODO Llama3 response
141
+ history[-1][1] = ""
142
+ for character in response:
143
+ history[-1][1] += character
144
+ time.sleep(0.05)
145
+ yield history
146
 
147
  with gr.Blocks(
148
+ title="MyAlexa",
149
  css="""
150
  textarea { font-size: 18px;}
 
 
 
 
151
  """,
152
  theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg) # make text slightly bigger (default is text_md )
153
  ) as demo:
 
171
 
172
  with gr.Column():
173
 
174
+ gr.HTML("<p><b>Step 2:</b> Enter audio as input and wait for MyAlexa's response.</p>")
175
 
176
+ submit_button = gr.Button(
177
+ value="Submit audio",
178
+ variant="primary"
179
  )
180
 
181
  chat_input = gr.Textbox(
182
  label="Transcribed text:",
183
+ interactive=False,
184
  placeholder="Enter message",
185
  elem_id="chat_input",
186
  visible=False
187
  )
188
+
189
+ gr.HTML("<p><b>Optional:</b> Enter audio as input and wait for MyAlexa's response.</p>")
190
+
191
+ repeat_button = gr.Button(
192
+ value="Repeat audio",
193
+ variant="secondary"
194
+ )
195
+
196
+ gr.HTML("<p><b>Optional:</b> Clear the chatbox.</p>")
197
+
198
+ clear = gr.ClearButton(
199
+ components=[chatbot],
200
+ value="Clear chat",
201
+ )
202
 
203
  chat_msg = chat_input.change(add_message, [chatbot, chat_input], [chatbot, chat_input])
204
  bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
205
  bot_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input])
206
 
207
+ submit_button.click(
208
  fn=transcribe,
209
  inputs = [audio_file],
210
  outputs = [chat_input]
211
+ )
212
+
213
+ repeat_button.click(
214
+ fn=None,
215
+ inputs = None,
216
+ outputs = None
217
  )
218
 
219
  demo.queue()