Spaces:
Sleeping
Sleeping
hashhac
commited on
Commit
·
a70a34d
1
Parent(s):
6218f6a
changed input values
Browse files
app.py
CHANGED
@@ -2,8 +2,9 @@ import gradio as gr
|
|
2 |
import numpy as np
|
3 |
import torch
|
4 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5ForSpeechToText
|
5 |
-
from datasets import load_dataset
|
6 |
import soundfile as sf
|
|
|
|
|
7 |
|
8 |
# Check if CUDA is available, otherwise use CPU
|
9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -48,7 +49,7 @@ def demo():
|
|
48 |
audio_data = audio_data.flatten().astype(np.float32) / 32768.0 # Normalize to [-1.0, 1.0]
|
49 |
|
50 |
# Speech-to-text
|
51 |
-
transcript = speech_to_text(audio_data)
|
52 |
print(f"Transcribed: {transcript}")
|
53 |
|
54 |
# Generate response (for simplicity, echo the transcript)
|
|
|
2 |
import numpy as np
|
3 |
import torch
|
4 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5ForSpeechToText
|
|
|
5 |
import soundfile as sf
|
6 |
+
import tempfile
|
7 |
+
import os
|
8 |
|
9 |
# Check if CUDA is available, otherwise use CPU
|
10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
49 |
audio_data = audio_data.flatten().astype(np.float32) / 32768.0 # Normalize to [-1.0, 1.0]
|
50 |
|
51 |
# Speech-to-text
|
52 |
+
transcript = speech_to_text({"array": audio_data, "sampling_rate": sample_rate})
|
53 |
print(f"Transcribed: {transcript}")
|
54 |
|
55 |
# Generate response (for simplicity, echo the transcript)
|