Spaces:

VanYsa
/

MyAlexa

Paused

App Files Files Community

VanYsa commited on Apr 28, 2024

Commit

a6cdcf6

1 Parent(s): 889e7e2

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -21,9 +21,9 @@ MAX_AUDIO_SECONDS = 40 # wont try to transcribe if longer than this
 DESCRIPTION = '''
 <div>
 <h1 style='text-align: center'>MyAlexa: Voice Chat Assistant</h1>
-<p style='text-align: center'>MyAlexa is a demo of a voice chat assistant that accepts audio input and outputs a voice response with chat logs. </p>
 <p>This space uses <a href="https://huggingface.co/nvidia/canary-1b"><b>NVIDIA Canary 1B</b></a> for Automatic Speech-to-text Recognition (ASR), <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama 3 8B Insruct</b></a> for the large language model (LLM) and <a href="https://https://huggingface.co/docs/transformers/en/model_doc/vits"><b>VITS</b></a> for text to speech (TTS).</p>
-<p>This demo accepts inputs not more than 40 seconds long.</p>
 <p>Transcription and responses are limited to the English language.</p>
 </div>
 '''
@@ -63,8 +63,8 @@ frame_asr = FrameBatchMultiTaskAED(
 amp_dtype = torch.float16
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct").to(device)
-llama3_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto").to(device)
 terminators = [
     tokenizer.eos_token_id,
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
@@ -173,7 +173,7 @@ def chat_llama3_8b(message: str,
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(llama3_model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

 DESCRIPTION = '''
 <div>
 <h1 style='text-align: center'>MyAlexa: Voice Chat Assistant</h1>
+<p style='text-align: center'>MyAlexa is a demo of a voice chat assistant with chat logs that accepts audio input and outputs an AI response. </p>
 <p>This space uses <a href="https://huggingface.co/nvidia/canary-1b"><b>NVIDIA Canary 1B</b></a> for Automatic Speech-to-text Recognition (ASR), <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama 3 8B Insruct</b></a> for the large language model (LLM) and <a href="https://https://huggingface.co/docs/transformers/en/model_doc/vits"><b>VITS</b></a> for text to speech (TTS).</p>
+<p>This demo accepts audio inputs not more than 40 seconds long.</p>
 <p>Transcription and responses are limited to the English language.</p>
 </div>
 '''
 amp_dtype = torch.float16
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+llama3_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct").to(device)
 terminators = [
     tokenizer.eos_token_id,
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)