Spaces:

kingabzpro
/

Transcribed-Urdu

Running

Transcribed-Urdu / app.py

Abid Ali Awan

Adjust chunk and stride lengths in the transcription pipeline in app.py to optimize processing efficiency and improve transcription accuracy.

9ef6b5c about 2 months ago

raw

history blame contribute delete

3.93 kB

	import os
	import re
	import warnings

	import gradio as gr
	import numpy as np
	import torch
	from transformers import (
	AutoModelForSpeechSeq2Seq,
	AutoProcessor,
	logging,
	pipeline,
	)

	warnings.simplefilter("ignore", FutureWarning)

	# —— CPU performance tweaks ——
	os.environ["OMP_NUM_THREADS"] = "4"
	os.environ["MKL_NUM_THREADS"] = "4"
	torch.set_num_threads(4)

	logging.set_verbosity_error()

	# —— Model & device setup ——
	model_id = "kingabzpro/whisper-large-v3-turbo-urdu"

	# Load in fp32 and quantize to int8
	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id,
	torch_dtype=torch.float32,
	use_safetensors=True,
	)
	model.eval()
	model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
	model = torch.compile(model)
	processor = AutoProcessor.from_pretrained(model_id)

	# Build a CPU-based pipeline with chunking
	transcriber = pipeline(
	task="automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	device=-1, # CPU
	chunk_length_s=20,
	stride_length_s=(5, 5),
	)


	def add_urdu_punctuation(text):
	"""
	Adds Urdu full stop (۔) at the end of sentences and optionally Urdu comma (،) after conjunctions.
	This is a simple heuristic and may not be perfect for all cases.
	"""
	# List of common Urdu conjunctions (for optional comma insertion)
	conjunctions = ["اور", "لیکن", "مگر", "یا", "پھر", "جبکہ", "کیونکہ", "تاہم"]
	# Add comma after conjunctions (optional, can be commented out if not desired)
	for conj in conjunctions:
	# Only add comma if not already present
	text = re.sub(rf"\b({conj})\b(?!،)", r"\1،", text)
	# Split sentences heuristically (by length or by pause words)
	# Here, we split by newlines or keep as one if no punctuation
	sentences = re.split(r"[\n]+", text)
	processed = []
	for s in sentences:
	s = s.strip()
	if not s:
	continue
	# Add Urdu full stop if not already present at end
	if not s.endswith("۔") and not s.endswith("؟"):
	s += "۔"
	processed.append(s)
	return "\n".join(processed)


	def transcribe(audio):
	if audio is None:
	return "No audio provided. Please record or upload an audio file."

	sr, y = audio
	# mono & normalize
	if y.ndim > 1:
	y = y.mean(axis=1)
	y = y.astype(np.float32)
	peak = np.max(np.abs(y))
	if peak > 0:
	y /= peak
	else:
	return "Audio appears to be silent. Please try again."

	# Inference under no_grad
	with torch.no_grad():
	result = transcriber({"sampling_rate": sr, "raw": y})
	text = result.get("text", "")
	# Add Urdu punctuation
	text = add_urdu_punctuation(text)
	return text


	# —— Gradio UI ——
	description = """
	<p style='text-align: center'>
	Record or upload audio in Urdu and get the transcribed text using the Whisper Large V3 Turbo Urdu model.
	</p>
	"""
	examples = [
	["samples/audio1.mp3"],
	["samples/audio2.mp3"],
	["samples/audio3.mp3"],
	]
	article = """
	<p style='text-align: center; color: #34C759;'>
	<a href='https://github.com/kingabzpro/simple-mlops-with-urdu-asr' target='_blank' style='text-decoration: none; color: #34C759;'>
	🌿 Explore the project on GitHub 📚
	</a>
	</p>
	"""

	demo = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(
	sources=["microphone", "upload"],
	type="numpy",
	label="Record or Upload Audio (Urdu)",
	),
	outputs=gr.Textbox(
	label="Transcribed Text (Urdu)",
	placeholder="Transcribed Urdu text will appear here...",
	),
	title="Urdu Speech Recognition",
	description=description,
	examples=examples,
	article=article,
	allow_flagging="never",
	theme="JohnSmith9982/small_and_pretty",
	)

	if __name__ == "__main__":
	demo.launch()