Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,10 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
|
23 |
model_id = "openai/whisper-large-v3-turbo"
|
24 |
|
25 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
26 |
-
model_id,
|
|
|
|
|
|
|
27 |
)
|
28 |
model.to(device)
|
29 |
|
@@ -57,7 +60,8 @@ def response(
|
|
57 |
llm_client = InferenceClient(provider="auto", token=hf_token)
|
58 |
|
59 |
result = pipe(
|
60 |
-
{"array": audio_to_float32(audio[1]).squeeze(), "sampling_rate": audio[0]}
|
|
|
61 |
)
|
62 |
transcription = result["text"]
|
63 |
|
@@ -69,6 +73,7 @@ def response(
|
|
69 |
"content": (
|
70 |
"You are a helpful assistant that can have engaging conversations."
|
71 |
"Your responses must be very short and concise. No more than two sentences. "
|
|
|
72 |
),
|
73 |
}
|
74 |
]
|
|
|
23 |
model_id = "openai/whisper-large-v3-turbo"
|
24 |
|
25 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
26 |
+
model_id,
|
27 |
+
torch_dtype=torch_dtype,
|
28 |
+
low_cpu_mem_usage=True,
|
29 |
+
use_safetensors=True,
|
30 |
)
|
31 |
model.to(device)
|
32 |
|
|
|
60 |
llm_client = InferenceClient(provider="auto", token=hf_token)
|
61 |
|
62 |
result = pipe(
|
63 |
+
{"array": audio_to_float32(audio[1]).squeeze(), "sampling_rate": audio[0]},
|
64 |
+
generate_kwargs={"language": "en"},
|
65 |
)
|
66 |
transcription = result["text"]
|
67 |
|
|
|
73 |
"content": (
|
74 |
"You are a helpful assistant that can have engaging conversations."
|
75 |
"Your responses must be very short and concise. No more than two sentences. "
|
76 |
+
"Reasoning: low"
|
77 |
),
|
78 |
}
|
79 |
]
|