Spaces:
Sleeping
Sleeping
File size: 2,578 Bytes
12baae1 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 0e268b1 12b6ee7 0e268b1 12b6ee7 0e268b1 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 8c5a4c5 12b6ee7 0e268b1 12b6ee7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import torch
import gradio as gr
from transformers import pipeline
import pytube as pt
MODEL_NAME = "openai/whisper-small"
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(microphone, file_upload):
warn_output = ""
if (microphone is not None) and (file_upload is not None):
warn_output = (
"WARNING: You've uploaded an audio file and used the microphone. "
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
)
file = microphone
elif (microphone is None) and (file_upload is None):
return "ERROR: You have to either use the microphone or upload an audio file"
file = microphone if microphone is not None else file_upload
text = pipe(file)["text"]
return warn_output + text
def _return_yt_html_embed(yt_url):
video_id = yt_url.split("?v=")[-1]
HTML_str = (
'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/'
+ video_id
+ '"></iframe></center>'
)
return HTML_str
def yt_transcribe(yt_url):
yt = pt.YouTube(yt_url)
html_embed_str = _return_yt_html_embed(yt_url)
stream = yt.streams.filter(only_audio=True)[0]
stream.download(filename="audio.mp3")
text = pipe("audio.mp3")["text"]
return html_embed_str, text
demo = gr.Blocks()
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
gr.inputs.Audio(source="upload", type="filepath", optional=True),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Whisper Audio Transcribe",
description="Transcribe long audio/ microphone input (powered by 🤗transformers) with a click of a button!",
allow_flagging="never",
)
yt_transcribe = gr.Interface(
fn=yt_transcribe,
inputs=[
gr.inputs.Textbox(
lines=1, placeholder="Paste a URL to YT video here", label="yt_url"
)
],
outputs=["html", "text"],
layout="horizontal",
theme="huggingface",
title="Whisper YT Transcribe",
description="Transcribe long YouTube videos (powered by 🤗transformers) with a click of a button!",
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mf_transcribe, yt_transcribe], ["Audio Transcribe", "YouTube Transcribe"]
)
demo.launch(enable_queue=True)
|