Spaces:

fastrtc
/

talk-to-oai-gpt-oss-20b

Running

freddyaboulton HF Staff commited on 21 days ago

Commit

6321d71

verified ·

1 Parent(s): ae054df

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,7 +23,10 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model_id = "openai/whisper-large-v3-turbo"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
 )
 model.to(device)
@@ -57,7 +60,8 @@ def response(
     llm_client = InferenceClient(provider="auto", token=hf_token)
     result = pipe(
-        {"array": audio_to_float32(audio[1]).squeeze(), "sampling_rate": audio[0]}
     )
     transcription = result["text"]
@@ -69,6 +73,7 @@ def response(
                 "content": (
                     "You are a helpful assistant that can have engaging conversations."
                     "Your responses must be very short and concise. No more than two sentences. "
                 ),
             }
         ]

 model_id = "openai/whisper-large-v3-turbo"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id,
+    torch_dtype=torch_dtype,
+    low_cpu_mem_usage=True,
+    use_safetensors=True,
 )
 model.to(device)
     llm_client = InferenceClient(provider="auto", token=hf_token)
     result = pipe(
+        {"array": audio_to_float32(audio[1]).squeeze(), "sampling_rate": audio[0]},
+        generate_kwargs={"language": "en"},
     )
     transcription = result["text"]
                 "content": (
                     "You are a helpful assistant that can have engaging conversations."
                     "Your responses must be very short and concise. No more than two sentences. "
+                    "Reasoning: low"
                 ),
             }
         ]