import os import gradio as gr from transformers import pipeline demo = gr.Blocks() pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english") pipe2 = pipeline("summarization", model="facebook/bart-large-cnn") def launch(input): out = pipe(input) out2 = pipe2(out) return out2[0]['summarized notes'] def transcribe_long_form(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = asr( filepath, max_new_tokens=256, chunk_length_s=30, batch_size=8, ) return output["text"] mic_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never") file_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch(share=True, server_port=int(os.environ['PORT1']))