Spaces:

helvekami
/

ShukaNote

Running on Zero

App Files Files Community

ShukaNote / app.py

helvekami

Updated Gradio App

e2f65f6 3 months ago

raw

history blame

1.76 kB

	import gradio as gr
	import transformers
	import librosa
	import torch

	# Load the Shuka model pipeline.
	pipe = transformers.pipeline(
	model="sarvamai/shuka_v1",
	trust_remote_code=True,
	device=0 if torch.cuda.is_available() else -1,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
	)

	def process_audio(audio):
	"""
	Processes the input audio and returns a text response generated by the Shuka model.
	"""
	if audio is None:
	return "No audio provided."

	# Gradio returns a tuple (sample_rate, numpy_array)
	sample_rate, audio_data = audio

	# Resample to 16000 Hz if necessary
	if sample_rate != 16000:
	audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
	sample_rate = 16000

	# Define conversation turns with a system prompt and a user prompt that signals audio input
	turns = [
	{'role': 'system', 'content': 'Respond naturally and informatively.'},
	{'role': 'user', 'content': '<\|audio\|>'}
	]

	# Run the pipeline with the audio input and conversation context
	result = pipe({'audio': audio_data, 'turns': turns, 'sampling_rate': sample_rate}, max_new_tokens=512)

	# Extract the generated text response
	if isinstance(result, list) and len(result) > 0:
	response = result[0].get('generated_text', '')
	else:
	response = str(result)
	return response

	# Create the Gradio interface without the 'source' parameter.
	iface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="numpy"),
	outputs="text",
	title="Sarvam AI Shuka Voice Demo",
	description="Upload a voice note and get a response using Sarvam AI's Shuka model."
	)

	if __name__ == "__main__":
	iface.launch()