Spaces:

balaji4991512
/

Text_Language_Detector

Running

File size: 1,844 Bytes

9185bc5

import torch
import gradio as gr
from transformers import pipeline
import pycountry

# Load a language‐detection model with bfloat16 precision
language_detector = pipeline(
    "text-classification",
    model="papluca/xlm-roberta-base-language-detection",
    torch_dtype=torch.bfloat16
)

def detect_language(text: str) -> str:
    """
    Detects the language of the given text and returns
    both the full language name and its ISO code with confidence.
    """
    result = language_detector(text)[0]
    code = result["label"]      # e.g. "en", "ta", "fr"
    score = result["score"]

    # Map ISO code to full language name using pycountry
    try:
        lang = pycountry.languages.get(alpha_2=code).name
    except:
        lang = code.upper()

    return f"{lang} ({code}) — {score:.2f}"

# Build Gradio interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown(
        """
        # 🌐 Text Language Detector  
        Type or paste text below to detect its language (name + code + confidence).
        """
    )

    with gr.Row():
        text_input = gr.Textbox(
            label="📝 Input Text",
            placeholder="Type or paste text here...",
            lines=4,
            show_copy_button=True
        )
        lang_output = gr.Textbox(
            label="✅ Detected Language",
            placeholder="Full language name, ISO code, and confidence will appear here",
            lines=1,
            interactive=False
        )

    detect_btn = gr.Button("🔍 Detect Language")
    detect_btn.click(fn=detect_language, inputs=text_input, outputs=lang_output)

    gr.Markdown(
        """
        ---  
        Built with 🤗 Transformers (`papluca/xlm-roberta-base-language-detection`),  
        `pycountry` for language names, and 🚀 Gradio  
        """
    )

demo.launch()