Spaces:

DarwinAnim8or
/

Whisper-Demo

Sleeping

App Files Files Community

Whisper-Demo / app.py

DarwinAnim8or

Update app.py

02ad7fc about 2 years ago

raw

history blame

1.64 kB

	import gradio as gr
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, WhisperForConditionalGeneration, WhisperProcessor

	# Load the model and processor
	model_id = "openai/whisper-medium"
	processor = WhisperProcessor.from_pretrained(model_id)

	model = WhisperForConditionalGeneration.from_pretrained(model_id)
	model.config.forced_decoder_ids = None

	def transcribelocal(microphone, file_upload):
	# Check which input is not None
	if microphone is not None:
	audio = microphone
	else:
	audio = file_upload

	# Use the processor to transcribe the audio
	transcription = processor.transcribe(audio, 48)

	# Extract the confidence score and the duration from the transcription
	confidence = transcription.confidence
	duration = transcription.duration

	# Remove the special tokens from the transcription text
	text = transcription.text.replace("<\|startoftranscript\|>", "").replace("<\|endoftranscript\|>", "")

	# Return the text, confidence and duration as outputs
	return text, confidence, duration

	# Create a Gradio interface with two modes: realtime and file upload
	iface = gr.Interface(
	fn=transcribelocal,
	inputs=[
	gr.inputs.Audio(source="microphone", type="numpy", label="Realtime Mode"),
	gr.inputs.Audio(source="upload", type="numpy", label="File Upload Mode")
	],
	outputs=[
	gr.outputs.Textbox(label="Transcription"),
	gr.outputs.Textbox(label="Confidence Score"),
	gr.outputs.Textbox(label="Duration (seconds)")
	],
	title="Whisper Transcription App",
	description="A Gradio app that uses OpenAI's whisper model to transcribe audio"
	)

	# Launch the app
	iface.launch()