Spaces:
Sleeping
Sleeping
File size: 1,354 Bytes
88dc3ba d16271b 88dc3ba 335cb84 eb4d23a 88dc3ba d16271b 527e644 ce7e2d6 527e644 88dc3ba 527e644 88dc3ba 527e644 ce7e2d6 527e644 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import subprocess
import gradio as gr # Add this import statement
subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
subprocess.run(["pip", "install", "gradio", "--upgrade"])
subprocess.run(["pip", "install", "transformers"])
subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])
# Install necessary libraries
!pip install gradio torch torchaudio
import gradio as gr
import torchaudio
from transformers import pipeline
# Load the Whispy/Whisper Italian ASR model
whisper_italian_asr = pipeline("whisper-italian")
# Define the ASR function
def transcribe_audio(audio):
# Save the audio file
torchaudio.save("user_audio.wav", audio.squeeze().numpy(), 16000)
# Load the saved audio file
user_audio, _ = torchaudio.load("user_audio.wav", normalize=True)
# Perform ASR using the Whispy/Whisper Italian model
transcription = whisper_italian_asr(user_audio.numpy())
return transcription[0]["transcription"]
# Create the Gradio interface
audio_input = gr.Audio(preprocess=torchaudio.transforms.Resample(orig_freq=44100, new_freq=16000))
iface = gr.Interface(
fn=transcribe_audio,
inputs=audio_input,
outputs="text",
live=True,
interpretation="default"
)
# Launch the Gradio app
iface.launch(share=True)
|