File size: 1,844 Bytes
9185bc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import torch
import gradio as gr
from transformers import pipeline
import pycountry
# Load a language‐detection model with bfloat16 precision
language_detector = pipeline(
"text-classification",
model="papluca/xlm-roberta-base-language-detection",
torch_dtype=torch.bfloat16
)
def detect_language(text: str) -> str:
"""
Detects the language of the given text and returns
both the full language name and its ISO code with confidence.
"""
result = language_detector(text)[0]
code = result["label"] # e.g. "en", "ta", "fr"
score = result["score"]
# Map ISO code to full language name using pycountry
try:
lang = pycountry.languages.get(alpha_2=code).name
except:
lang = code.upper()
return f"{lang} ({code}) — {score:.2f}"
# Build Gradio interface
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown(
"""
# 🌐 Text Language Detector
Type or paste text below to detect its language (name + code + confidence).
"""
)
with gr.Row():
text_input = gr.Textbox(
label="📝 Input Text",
placeholder="Type or paste text here...",
lines=4,
show_copy_button=True
)
lang_output = gr.Textbox(
label="✅ Detected Language",
placeholder="Full language name, ISO code, and confidence will appear here",
lines=1,
interactive=False
)
detect_btn = gr.Button("🔍 Detect Language")
detect_btn.click(fn=detect_language, inputs=text_input, outputs=lang_output)
gr.Markdown(
"""
---
Built with 🤗 Transformers (`papluca/xlm-roberta-base-language-detection`),
`pycountry` for language names, and 🚀 Gradio
"""
)
demo.launch() |