Spaces:

helvekami
/

ShukaNote

Running on Zero

App Files Files Community

ShukaNote / app.py

helvekami

Updated Gradio App

9c37c06 3 months ago

raw

history blame

2.29 kB

	import transformers
	import gradio as gr
	import librosa
	import torch
	import spaces
	import numpy as np

	@spaces.GPU(duration=60)
	def transcribe_and_respond(audio_file):
	try:
	pipe = transformers.pipeline(
	model='sarvamai/shuka_v1',
	trust_remote_code=True,
	device=0,
	torch_dtype=torch.bfloat16
	)

	# Load the audio file, requesting a sample rate of 16000
	audio, sr = librosa.load(audio_file, sr=16000)

	# Convert the loaded audio to a contiguous float32 array
	audio = np.ascontiguousarray(audio, dtype=np.float32)

	# If audio has more than one channel, convert to mono by averaging channels
	if audio.ndim > 1:
	audio = np.mean(audio, axis=-1)

	# Debug: Print audio properties
	print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")

	# Although we requested 16000 Hz, double-check the sample rate.
	# If not 16000, force conversion:
	if sr != 16000:
	# Ensure the audio is float32 before resampling
	audio = audio.astype(np.float32)
	audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
	sr = 16000

	# Set up the transcription prompt to get exact transcription
	turns = [
	{'role': 'system', 'content': 'Please transcribe the following audio exactly.'},
	{'role': 'user', 'content': '<\|audio\|>'}
	]

	# Debug: Print the initial turns
	print(f"Initial turns: {turns}")

	# Call the model with the audio and prompt
	output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)

	# Debug: Print the final output from the model
	print(f"Model output: {output}")

	return output

	except Exception as e:
	return f"Error: {str(e)}"

	iface = gr.Interface(
	fn=transcribe_and_respond,
	inputs=gr.Audio(sources="microphone", type="filepath"),
	outputs="text",
	title="Live Transcription and Response",
	description="Speak into your microphone, and the model will transcribe your speech.",
	live=True
	)

	if __name__ == "__main__":
	iface.launch()