import os
import re
import warnings

import gradio as gr
import numpy as np
import torch
from transformers import (
    AutoModelForSpeechSeq2Seq,
    AutoProcessor,
    logging,
    pipeline,
)

warnings.simplefilter("ignore", FutureWarning)

# —— CPU performance tweaks ——
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"
torch.set_num_threads(4)

logging.set_verbosity_error()

# —— Model & device setup ——
model_id = "kingabzpro/whisper-large-v3-turbo-urdu"

# Load in fp32 and quantize to int8
model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id,
    torch_dtype=torch.float32,
    use_safetensors=True,
)
model.eval()
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
model = torch.compile(model)
processor = AutoProcessor.from_pretrained(model_id)

# Build a CPU-based pipeline with chunking
transcriber = pipeline(
    task="automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    device=-1,  # CPU
    chunk_length_s=20,
    stride_length_s=(5, 5),
)


def add_urdu_punctuation(text):
    """
    Adds Urdu full stop (۔) at the end of sentences and optionally Urdu comma (،) after conjunctions.
    This is a simple heuristic and may not be perfect for all cases.
    """
    # List of common Urdu conjunctions (for optional comma insertion)
    conjunctions = ["اور", "لیکن", "مگر", "یا", "پھر", "جبکہ", "کیونکہ", "تاہم"]
    # Add comma after conjunctions (optional, can be commented out if not desired)
    for conj in conjunctions:
        # Only add comma if not already present
        text = re.sub(rf"\b({conj})\b(?!،)", r"\1،", text)
    # Split sentences heuristically (by length or by pause words)
    # Here, we split by newlines or keep as one if no punctuation
    sentences = re.split(r"[\n]+", text)
    processed = []
    for s in sentences:
        s = s.strip()
        if not s:
            continue
        # Add Urdu full stop if not already present at end
        if not s.endswith("۔") and not s.endswith("؟"):
            s += "۔"
        processed.append(s)
    return "\n".join(processed)


def transcribe(audio):
    if audio is None:
        return "No audio provided. Please record or upload an audio file."

    sr, y = audio
    # mono & normalize
    if y.ndim > 1:
        y = y.mean(axis=1)
    y = y.astype(np.float32)
    peak = np.max(np.abs(y))
    if peak > 0:
        y /= peak
    else:
        return "Audio appears to be silent. Please try again."

    # Inference under no_grad
    with torch.no_grad():
        result = transcriber({"sampling_rate": sr, "raw": y})
    text = result.get("text", "")
    # Add Urdu punctuation
    text = add_urdu_punctuation(text)
    return text


# —— Gradio UI ——
description = """
<p style='text-align: center'>
Record or upload audio in Urdu and get the transcribed text using the Whisper Large V3 Turbo Urdu model.
</p>
"""
examples = [
    ["samples/audio1.mp3"],
    ["samples/audio2.mp3"],
    ["samples/audio3.mp3"],
]
article = """
<p style='text-align: center; color: #34C759;'>
<a href='https://github.com/kingabzpro/simple-mlops-with-urdu-asr' target='_blank' style='text-decoration: none; color: #34C759;'>
🌿 Explore the project on GitHub 📚
</a>
</p>
"""

demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(
        sources=["microphone", "upload"],
        type="numpy",
        label="Record or Upload Audio (Urdu)",
    ),
    outputs=gr.Textbox(
        label="Transcribed Text (Urdu)",
        placeholder="Transcribed Urdu text will appear here...",
    ),
    title="Urdu Speech Recognition",
    description=description,
    examples=examples,
    article=article,
    allow_flagging="never",
    theme="JohnSmith9982/small_and_pretty",
)

if __name__ == "__main__":
    demo.launch()