Update app.py
Browse files
app.py
CHANGED
|
@@ -21,9 +21,9 @@ MAX_AUDIO_SECONDS = 40 # wont try to transcribe if longer than this
|
|
| 21 |
DESCRIPTION = '''
|
| 22 |
<div>
|
| 23 |
<h1 style='text-align: center'>MyAlexa: Voice Chat Assistant</h1>
|
| 24 |
-
<p style='text-align: center'>MyAlexa is a demo of a voice chat assistant that accepts audio input and outputs
|
| 25 |
<p>This space uses <a href="https://huggingface.co/nvidia/canary-1b"><b>NVIDIA Canary 1B</b></a> for Automatic Speech-to-text Recognition (ASR), <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama 3 8B Insruct</b></a> for the large language model (LLM) and <a href="https://https://huggingface.co/docs/transformers/en/model_doc/vits"><b>VITS</b></a> for text to speech (TTS).</p>
|
| 26 |
-
<p>This demo accepts inputs not more than 40 seconds long.</p>
|
| 27 |
<p>Transcription and responses are limited to the English language.</p>
|
| 28 |
</div>
|
| 29 |
'''
|
|
@@ -63,8 +63,8 @@ frame_asr = FrameBatchMultiTaskAED(
|
|
| 63 |
|
| 64 |
amp_dtype = torch.float16
|
| 65 |
|
| 66 |
-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
|
| 67 |
-
llama3_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct"
|
| 68 |
terminators = [
|
| 69 |
tokenizer.eos_token_id,
|
| 70 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
@@ -173,7 +173,7 @@ def chat_llama3_8b(message: str,
|
|
| 173 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
| 174 |
conversation.append({"role": "user", "content": message})
|
| 175 |
|
| 176 |
-
input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(
|
| 177 |
|
| 178 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
| 179 |
|
|
|
|
| 21 |
DESCRIPTION = '''
|
| 22 |
<div>
|
| 23 |
<h1 style='text-align: center'>MyAlexa: Voice Chat Assistant</h1>
|
| 24 |
+
<p style='text-align: center'>MyAlexa is a demo of a voice chat assistant with chat logs that accepts audio input and outputs an AI response. </p>
|
| 25 |
<p>This space uses <a href="https://huggingface.co/nvidia/canary-1b"><b>NVIDIA Canary 1B</b></a> for Automatic Speech-to-text Recognition (ASR), <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama 3 8B Insruct</b></a> for the large language model (LLM) and <a href="https://https://huggingface.co/docs/transformers/en/model_doc/vits"><b>VITS</b></a> for text to speech (TTS).</p>
|
| 26 |
+
<p>This demo accepts audio inputs not more than 40 seconds long.</p>
|
| 27 |
<p>Transcription and responses are limited to the English language.</p>
|
| 28 |
</div>
|
| 29 |
'''
|
|
|
|
| 63 |
|
| 64 |
amp_dtype = torch.float16
|
| 65 |
|
| 66 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
|
| 67 |
+
llama3_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct").to(device)
|
| 68 |
terminators = [
|
| 69 |
tokenizer.eos_token_id,
|
| 70 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
|
| 173 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
| 174 |
conversation.append({"role": "user", "content": message})
|
| 175 |
|
| 176 |
+
input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(device)
|
| 177 |
|
| 178 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
| 179 |
|