Spaces:

TechAudio
/

ReaSpeech-Cloud

Sleeping

ReaSpeech-Cloud / app /openai_whisper /core.py

initial commit

402daee over 1 year ago

2.53 kB

	import os
	from io import StringIO
	from threading import Lock
	from typing import BinaryIO, Union

	import torch
	import whisper
	from whisper.utils import ResultWriter, WriteTXT, WriteSRT, WriteVTT, WriteTSV, WriteJSON

	ASR_ENGINE_OPTIONS = frozenset([
	"task",
	"language",
	"initial_prompt",
	"word_timestamps",
	])

	model_name = os.getenv("ASR_MODEL", "small")
	model_path = os.getenv("ASR_MODEL_PATH", os.path.join(os.path.expanduser("~"), ".cache", "whisper"))

	model_lock = Lock()

	model = None
	def load_model(next_model_name: str):
	with model_lock:
	global model_name, model

	if model and next_model_name == model_name:
	return model

	if torch.cuda.is_available():
	model = whisper.load_model(next_model_name, download_root=model_path).cuda()
	else:
	model = whisper.load_model(next_model_name, download_root=model_path)

	model_name = next_model_name

	return model


	def transcribe(audio, asr_options, output):
	options_dict = {k: v for k, v in asr_options.items() if k in ASR_ENGINE_OPTIONS}

	with model_lock:
	result = model.transcribe(audio, **options_dict)

	output_file = StringIO()
	write_result(result, output_file, output)
	output_file.seek(0)

	return output_file


	def language_detection(audio):
	# load audio and pad/trim it to fit 30 seconds
	audio = whisper.pad_or_trim(audio)

	# make log-Mel spectrogram and move to the same device as the model
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	# detect the spoken language
	with model_lock:
	_, probs = model.detect_language(mel)
	detected_lang_code = max(probs, key=probs.get)

	return detected_lang_code


	def write_result(
	result: dict, file: BinaryIO, output: Union[str, None]
	):
	options = {
	'max_line_width': 1000,
	'max_line_count': 10,
	'highlight_words': False
	}
	if output == "srt":
	WriteSRT(ResultWriter).write_result(result, file=file, options=options)
	elif output == "vtt":
	WriteVTT(ResultWriter).write_result(result, file=file, options=options)
	elif output == "tsv":
	WriteTSV(ResultWriter).write_result(result, file=file, options=options)
	elif output == "json":
	WriteJSON(ResultWriter).write_result(result, file=file, options=options)
	elif output == "txt":
	WriteTXT(ResultWriter).write_result(result, file=file, options=options)
	else:
	return 'Please select an output method!'