File size: 1,354 Bytes
88dc3ba
d16271b
88dc3ba
335cb84
eb4d23a
88dc3ba
 
d16271b
527e644
 
 
ce7e2d6
527e644
 
88dc3ba
527e644
 
88dc3ba
527e644
ce7e2d6
527e644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import subprocess
import gradio as gr  # Add this import statement

subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
subprocess.run(["pip", "install", "gradio", "--upgrade"])
subprocess.run(["pip", "install", "transformers"])
subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])

# Install necessary libraries
!pip install gradio torch torchaudio

import gradio as gr
import torchaudio
from transformers import pipeline

# Load the Whispy/Whisper Italian ASR model
whisper_italian_asr = pipeline("whisper-italian")

# Define the ASR function
def transcribe_audio(audio):
    # Save the audio file
    torchaudio.save("user_audio.wav", audio.squeeze().numpy(), 16000)

    # Load the saved audio file
    user_audio, _ = torchaudio.load("user_audio.wav", normalize=True)

    # Perform ASR using the Whispy/Whisper Italian model
    transcription = whisper_italian_asr(user_audio.numpy())

    return transcription[0]["transcription"]

# Create the Gradio interface
audio_input = gr.Audio(preprocess=torchaudio.transforms.Resample(orig_freq=44100, new_freq=16000))

iface = gr.Interface(
    fn=transcribe_audio,
    inputs=audio_input,
    outputs="text",
    live=True,
    interpretation="default"
)

# Launch the Gradio app
iface.launch(share=True)