|
import gradio as gr |
|
import pandas as pd |
|
from backend.language_detector import LanguageDetector |
|
from typing import List, Dict, Any |
|
import time |
|
|
|
|
|
from demo_page import DEMO_SAMPLES, initialize_models, detect_with_all_models, create_results_dataframe, run_demo_tests |
|
|
|
def create_single_model_interface(): |
|
"""Create the original single model interface.""" |
|
|
|
detector = LanguageDetector() |
|
|
|
with gr.Column() as single_interface: |
|
gr.Markdown("# 🌍 Language Detection App") |
|
gr.Markdown("Select a model and enter text below to detect its language with confidence scores.") |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown( |
|
"<div style='text-align: center; padding: 16px 0 8px 0; margin-bottom: 16px; font-size: 18px; font-weight: 600; border-bottom: 2px solid; background: linear-gradient(90deg, transparent, rgba(99, 102, 241, 0.1), transparent); border-radius: 8px 8px 0 0;'>🤖 Model Selection</div>" |
|
) |
|
|
|
|
|
available_models = detector.get_available_models() |
|
model_choices = [] |
|
model_info_map = {} |
|
|
|
for key, info in available_models.items(): |
|
if info["status"] == "available": |
|
model_choices.append((info["display_name"], key)) |
|
else: |
|
model_choices.append((f"{info['display_name']} (Coming Soon)", key)) |
|
model_info_map[key] = info |
|
|
|
model_selector = gr.Dropdown( |
|
choices=model_choices, |
|
value="model-a-dataset-a", |
|
label="Choose Language Detection Model", |
|
interactive=True |
|
) |
|
|
|
|
|
model_info_display = gr.Markdown( |
|
value=_format_model_info(detector.get_current_model_info()), |
|
label="Model Information" |
|
) |
|
|
|
|
|
gr.Markdown( |
|
"<div style='margin: 24px 0; border-top: 3px solid rgba(99, 102, 241, 0.2); background: linear-gradient(90deg, transparent, rgba(99, 102, 241, 0.05), transparent); height: 2px;'></div>" |
|
) |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown( |
|
"<div style='text-align: center; padding: 16px 0 8px 0; margin-bottom: 16px; font-size: 18px; font-weight: 600; border-bottom: 2px solid; background: linear-gradient(90deg, transparent, rgba(34, 197, 94, 0.1), transparent); border-radius: 8px 8px 0 0;'>🔍 Language Analysis</div>" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
|
|
text_input = gr.Textbox( |
|
label="Text to Analyze", |
|
placeholder="Enter text here to detect its language...", |
|
lines=5, |
|
max_lines=10 |
|
) |
|
|
|
detect_btn = gr.Button("🔍 Detect Language", variant="primary", size="lg") |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["Hello, how are you today?"], |
|
["Bonjour, comment allez-vous?"], |
|
["Hola, ¿cómo estás?"], |
|
["Guten Tag, wie geht es Ihnen?"], |
|
["こんにちは、元気ですか?"], |
|
["Привет, как дела?"], |
|
["Ciao, come stai?"], |
|
["Olá, como você está?"], |
|
["你好,你好吗?"], |
|
["안녕하세요, 어떻게 지내세요?"] |
|
], |
|
inputs=text_input, |
|
label="Try these examples:" |
|
) |
|
|
|
with gr.Column(scale=2): |
|
|
|
with gr.Group(): |
|
gr.Markdown( |
|
"<div style='text-align: center; padding: 16px 0 8px 0; margin-bottom: 12px; font-size: 18px; font-weight: 600; border-bottom: 2px solid; background: linear-gradient(90deg, transparent, rgba(168, 85, 247, 0.1), transparent); border-radius: 8px 8px 0 0;'>📊 Detection Results</div>" |
|
) |
|
|
|
detected_language = gr.Textbox( |
|
label="Detected Language", |
|
interactive=False |
|
) |
|
|
|
confidence_score = gr.Number( |
|
label="Confidence Score", |
|
interactive=False, |
|
precision=4 |
|
) |
|
|
|
language_code = gr.Textbox( |
|
label="Language Code (ISO 639-1)", |
|
interactive=False |
|
) |
|
|
|
|
|
top_predictions = gr.Dataframe( |
|
headers=["Language", "Code", "Confidence"], |
|
label="Top 5 Predictions", |
|
interactive=False, |
|
wrap=True |
|
) |
|
|
|
|
|
with gr.Row(): |
|
status_text = gr.Textbox( |
|
label="Status", |
|
interactive=False, |
|
visible=False |
|
) |
|
|
|
|
|
def detect_language_wrapper(text, selected_model): |
|
if not text.strip(): |
|
return ( |
|
"No text provided", |
|
0.0, |
|
"", |
|
[], |
|
gr.update(value="Please enter some text to analyze.", visible=True) |
|
) |
|
|
|
try: |
|
|
|
if detector.current_model_key != selected_model: |
|
try: |
|
detector.switch_model(selected_model) |
|
except NotImplementedError: |
|
return ( |
|
"Model unavailable", |
|
0.0, |
|
"", |
|
[], |
|
gr.update(value="This model is not yet implemented. Please select an available model.", visible=True) |
|
) |
|
except Exception as e: |
|
return ( |
|
"Model error", |
|
0.0, |
|
"", |
|
[], |
|
gr.update(value=f"Error loading model: {str(e)}", visible=True) |
|
) |
|
|
|
result = detector.detect_language(text) |
|
|
|
|
|
main_lang = result['language'] |
|
main_confidence = result['confidence'] |
|
main_code = result['language_code'] |
|
|
|
|
|
predictions_table = [ |
|
[pred['language'], pred['language_code'], f"{pred['confidence']:.4f}"] |
|
for pred in result['top_predictions'] |
|
] |
|
|
|
model_info = result.get('metadata', {}).get('model_info', {}) |
|
model_name = model_info.get('name', 'Unknown Model') |
|
|
|
return ( |
|
main_lang, |
|
main_confidence, |
|
main_code, |
|
predictions_table, |
|
gr.update(value=f"✅ Analysis Complete\n\nInput Text: {text[:100]}{'...' if len(text) > 100 else ''}\n\nDetected Language: {main_lang} ({main_code})\nConfidence: {main_confidence:.2%}\n\nModel: {model_name}", visible=True) |
|
) |
|
|
|
except Exception as e: |
|
return ( |
|
"Error occurred", |
|
0.0, |
|
"", |
|
[], |
|
gr.update(value=f"Error: {str(e)}", visible=True) |
|
) |
|
|
|
def update_model_info(selected_model): |
|
"""Update model information display when model selection changes.""" |
|
try: |
|
if detector.current_model_key != selected_model: |
|
detector.switch_model(selected_model) |
|
model_info = detector.get_current_model_info() |
|
return _format_model_info(model_info) |
|
except NotImplementedError: |
|
return "**This model is not yet implemented.** Please select an available model." |
|
except Exception as e: |
|
return f"**Error loading model information:** {str(e)}" |
|
|
|
|
|
detect_btn.click( |
|
fn=detect_language_wrapper, |
|
inputs=[text_input, model_selector], |
|
outputs=[detected_language, confidence_score, language_code, top_predictions, status_text] |
|
) |
|
|
|
|
|
text_input.submit( |
|
fn=detect_language_wrapper, |
|
inputs=[text_input, model_selector], |
|
outputs=[detected_language, confidence_score, language_code, top_predictions, status_text] |
|
) |
|
|
|
|
|
model_selector.change( |
|
fn=update_model_info, |
|
inputs=[model_selector], |
|
outputs=[model_info_display] |
|
) |
|
|
|
return single_interface |
|
|
|
def create_demo_comparison_interface(): |
|
"""Create the demo comparison interface.""" |
|
|
|
|
|
models = initialize_models() |
|
|
|
with gr.Column() as demo_interface: |
|
gr.Markdown("# 🚀 Language Detection Demo - Model Comparison") |
|
gr.Markdown("Compare all four language detection models simultaneously across various difficulty categories.") |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown("## 🤖 Model Status") |
|
model_status_text = "" |
|
for model_key, model_info in models.items(): |
|
status_icon = "✅" if model_info["status"] == "Ready" else "❌" |
|
model_status_text += f"{status_icon} **{model_info['name']}**: {model_info['status']}\n\n" |
|
gr.Markdown(model_status_text) |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown("## 📊 Test Categories") |
|
gr.Markdown("Select categories to test different aspects of language detection difficulty:") |
|
|
|
category_checkboxes = gr.CheckboxGroup( |
|
choices=list(DEMO_SAMPLES.keys()), |
|
label="Select Test Categories", |
|
value=["Easy/Obvious", "Short Text"], |
|
interactive=True |
|
) |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown("## ✏️ Custom Text Input") |
|
gr.Markdown("Enter your own texts to test (one per line):") |
|
|
|
custom_text_input = gr.Textbox( |
|
label="Custom Texts", |
|
placeholder="Enter custom texts here, one per line...\nExample:\nHello world\nBonjour le monde\n你好世界", |
|
lines=5, |
|
max_lines=10 |
|
) |
|
|
|
|
|
with gr.Row(): |
|
run_demo_btn = gr.Button("🔍 Run Demo Tests", variant="primary", size="lg") |
|
clear_btn = gr.Button("🗑️ Clear Results", variant="secondary") |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown("## 📚 Category Explanations") |
|
gr.Markdown("Understanding what each test category evaluates:") |
|
|
|
category_explanations = """ |
|
**Easy/Obvious:** Clear, unambiguous sentences in their native language. Tests basic language detection capability. |
|
|
|
**Short Text:** Single words or very short phrases. Tests model performance with minimal context. |
|
|
|
**False Friends:** Words that look similar across languages but have different meanings. Tests ability to distinguish between closely related languages. |
|
|
|
**Mixed Scripts:** Text containing multiple languages, numbers, symbols, or scripts. Tests handling of multilingual content. |
|
|
|
**Proper Nouns:** Names of people, places, or entities that exist across multiple languages. Tests context-dependent detection. |
|
|
|
**Common Words:** International words with similar spelling across languages (hotel, taxi, etc.). Tests disambiguation of universal terms. |
|
|
|
**Technical Terms:** Specialized vocabulary that may be borrowed or translated across languages. Tests domain-specific detection. |
|
|
|
**Code-switching:** Text that switches between languages mid-sentence. Tests handling of bilingual communication patterns. |
|
|
|
**Transliterated Text:** Non-Latin scripts written in Latin characters. Tests recognition of transliteration vs. native language. |
|
|
|
**Ambiguous Script:** Words that could belong to multiple languages with identical spelling. Tests the model's decision-making under uncertainty. |
|
""" |
|
|
|
gr.Markdown(category_explanations) |
|
|
|
|
|
with gr.Group(): |
|
gr.Markdown("## 📈 Results") |
|
|
|
summary_output = gr.Textbox( |
|
label="Summary", |
|
interactive=False, |
|
visible=False |
|
) |
|
|
|
results_dataframe = gr.Dataframe( |
|
label="Model Comparison Results (A-A: Model A Dataset A, B-A: Model B Dataset A, A-B: Model A Dataset B, B-B: Model B Dataset B)", |
|
wrap=True, |
|
interactive=False, |
|
visible=False |
|
) |
|
|
|
|
|
def run_tests(selected_cats, custom_texts): |
|
summary, df = run_demo_tests(selected_cats, custom_texts, models) |
|
|
|
if df is not None: |
|
return ( |
|
gr.update(value=summary, visible=True), |
|
gr.update(value=df, visible=True) |
|
) |
|
else: |
|
return ( |
|
gr.update(value=summary, visible=True), |
|
gr.update(visible=False) |
|
) |
|
|
|
def clear_results(): |
|
return ( |
|
gr.update(value="", visible=False), |
|
gr.update(value=None, visible=False) |
|
) |
|
|
|
|
|
run_demo_btn.click( |
|
fn=run_tests, |
|
inputs=[category_checkboxes, custom_text_input], |
|
outputs=[summary_output, results_dataframe] |
|
) |
|
|
|
clear_btn.click( |
|
fn=clear_results, |
|
outputs=[summary_output, results_dataframe] |
|
) |
|
|
|
return demo_interface |
|
|
|
def _format_model_info(model_info): |
|
"""Format model information for display.""" |
|
if not model_info: |
|
return "No model information available." |
|
|
|
formatted_info = f""" |
|
**{model_info.get('name', 'Unknown Model')}** |
|
|
|
{model_info.get('description', 'No description available.')} |
|
|
|
**📊 Performance:** |
|
- Accuracy: {model_info.get('accuracy', 'N/A')} |
|
- Model Size: {model_info.get('model_size', 'N/A')} |
|
|
|
**🏗️ Architecture:** |
|
- Model Architecture: {model_info.get('architecture', 'N/A')} |
|
- Base Model: {model_info.get('base_model', 'N/A')} |
|
- Training Dataset: {model_info.get('dataset', 'N/A')} |
|
|
|
**🌐 Languages:** {model_info.get('languages_supported', 'N/A')} |
|
|
|
**⚙️ Training Details:** {model_info.get('training_details', 'N/A')} |
|
|
|
**💡 Use Cases:** {model_info.get('use_cases', 'N/A')} |
|
|
|
**✅ Strengths:** {model_info.get('strengths', 'N/A')} |
|
|
|
**⚠️ Limitations:** {model_info.get('limitations', 'N/A')} |
|
""" |
|
return formatted_info |
|
|
|
def main(): |
|
"""Create the main application with tabbed interface.""" |
|
|
|
with gr.Blocks(title="Language Detection App Suite", theme=gr.themes.Soft()) as app: |
|
gr.Markdown("# 🌍 Language Detection App Suite") |
|
gr.Markdown("Choose between single model testing or comprehensive model comparison.") |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("🔍 Single Model Detection"): |
|
single_model_interface = create_single_model_interface() |
|
|
|
with gr.TabItem("🚀 Model Comparison Demo"): |
|
demo_comparison_interface = create_demo_comparison_interface() |
|
|
|
return app |
|
|
|
if __name__ == "__main__": |
|
app = main() |
|
app.launch() |