import gradio as gr import pandas as pd from backend.language_detector import LanguageDetector from typing import List, Dict, Any import time # Import demo samples from demo_page from demo_page import DEMO_SAMPLES, initialize_models, detect_with_all_models, create_results_dataframe, run_demo_tests def create_single_model_interface(): """Create the original single model interface.""" # Initialize the language detector with default model (Model A Dataset A) detector = LanguageDetector() with gr.Column() as single_interface: gr.Markdown("# 🌍 Language Detection App") gr.Markdown("Select a model and enter text below to detect its language with confidence scores.") # Model Selection Section with visual styling with gr.Group(): gr.Markdown( "
πŸ€– Model Selection
" ) # Get available models available_models = detector.get_available_models() model_choices = [] model_info_map = {} for key, info in available_models.items(): if info["status"] == "available": model_choices.append((info["display_name"], key)) else: model_choices.append((f"{info['display_name']} (Coming Soon)", key)) model_info_map[key] = info model_selector = gr.Dropdown( choices=model_choices, value="model-a-dataset-a", # Default to Model A Dataset A label="Choose Language Detection Model", interactive=True ) # Model Information Display model_info_display = gr.Markdown( value=_format_model_info(detector.get_current_model_info()), label="Model Information" ) # Add visual separator gr.Markdown( "
" ) # Analysis Section with gr.Group(): gr.Markdown( "
πŸ” Language Analysis
" ) with gr.Row(): with gr.Column(scale=2): # Input section text_input = gr.Textbox( label="Text to Analyze", placeholder="Enter text here to detect its language...", lines=5, max_lines=10 ) detect_btn = gr.Button("πŸ” Detect Language", variant="primary", size="lg") # Example texts gr.Examples( examples=[ ["Hello, how are you today?"], ["Bonjour, comment allez-vous?"], ["Hola, ΒΏcΓ³mo estΓ‘s?"], ["Guten Tag, wie geht es Ihnen?"], ["γ“γ‚“γ«γ‘γ―γ€ε…ƒζ°—γ§γ™γ‹οΌŸ"], ["ΠŸΡ€ΠΈΠ²Π΅Ρ‚, ΠΊΠ°ΠΊ Π΄Π΅Π»Π°?"], ["Ciao, come stai?"], ["OlΓ‘, como vocΓͺ estΓ‘?"], ["δ½ ε₯½οΌŒδ½ ε₯½ε—οΌŸ"], ["μ•ˆλ…•ν•˜μ„Έμš”, μ–΄λ–»κ²Œ μ§€λ‚΄μ„Έμš”?"] ], inputs=text_input, label="Try these examples:" ) with gr.Column(scale=2): # Output section with gr.Group(): gr.Markdown( "
πŸ“Š Detection Results
" ) detected_language = gr.Textbox( label="Detected Language", interactive=False ) confidence_score = gr.Number( label="Confidence Score", interactive=False, precision=4 ) language_code = gr.Textbox( label="Language Code (ISO 639-1)", interactive=False ) # Top predictions table top_predictions = gr.Dataframe( headers=["Language", "Code", "Confidence"], label="Top 5 Predictions", interactive=False, wrap=True ) # Status/Info section with gr.Row(): status_text = gr.Textbox( label="Status", interactive=False, visible=False ) # Event handlers def detect_language_wrapper(text, selected_model): if not text.strip(): return ( "No text provided", 0.0, "", [], gr.update(value="Please enter some text to analyze.", visible=True) ) try: # Switch model if needed if detector.current_model_key != selected_model: try: detector.switch_model(selected_model) except NotImplementedError: return ( "Model unavailable", 0.0, "", [], gr.update(value="This model is not yet implemented. Please select an available model.", visible=True) ) except Exception as e: return ( "Model error", 0.0, "", [], gr.update(value=f"Error loading model: {str(e)}", visible=True) ) result = detector.detect_language(text) # Extract main prediction main_lang = result['language'] main_confidence = result['confidence'] main_code = result['language_code'] # Format top predictions for table predictions_table = [ [pred['language'], pred['language_code'], f"{pred['confidence']:.4f}"] for pred in result['top_predictions'] ] model_info = result.get('metadata', {}).get('model_info', {}) model_name = model_info.get('name', 'Unknown Model') return ( main_lang, main_confidence, main_code, predictions_table, gr.update(value=f"βœ… Analysis Complete\n\nInput Text: {text[:100]}{'...' if len(text) > 100 else ''}\n\nDetected Language: {main_lang} ({main_code})\nConfidence: {main_confidence:.2%}\n\nModel: {model_name}", visible=True) ) except Exception as e: return ( "Error occurred", 0.0, "", [], gr.update(value=f"Error: {str(e)}", visible=True) ) def update_model_info(selected_model): """Update model information display when model selection changes.""" try: if detector.current_model_key != selected_model: detector.switch_model(selected_model) model_info = detector.get_current_model_info() return _format_model_info(model_info) except NotImplementedError: return "**This model is not yet implemented.** Please select an available model." except Exception as e: return f"**Error loading model information:** {str(e)}" # Connect the button to the detection function detect_btn.click( fn=detect_language_wrapper, inputs=[text_input, model_selector], outputs=[detected_language, confidence_score, language_code, top_predictions, status_text] ) # Also trigger on Enter key in text input text_input.submit( fn=detect_language_wrapper, inputs=[text_input, model_selector], outputs=[detected_language, confidence_score, language_code, top_predictions, status_text] ) # Update model info when selection changes model_selector.change( fn=update_model_info, inputs=[model_selector], outputs=[model_info_display] ) return single_interface def create_demo_comparison_interface(): """Create the demo comparison interface.""" # Initialize models models = initialize_models() with gr.Column() as demo_interface: gr.Markdown("# πŸš€ Language Detection Demo - Model Comparison") gr.Markdown("Compare all four language detection models simultaneously across various difficulty categories.") # Model Status Section with gr.Group(): gr.Markdown("## πŸ€– Model Status") model_status_text = "" for model_key, model_info in models.items(): status_icon = "βœ…" if model_info["status"] == "Ready" else "❌" model_status_text += f"{status_icon} **{model_info['name']}**: {model_info['status']}\n\n" gr.Markdown(model_status_text) # Category Selection Section with gr.Group(): gr.Markdown("## πŸ“Š Test Categories") gr.Markdown("Select categories to test different aspects of language detection difficulty:") category_checkboxes = gr.CheckboxGroup( choices=list(DEMO_SAMPLES.keys()), label="Select Test Categories", value=["Easy/Obvious", "Short Text"], # Default selection interactive=True ) # Custom Text Input Section with gr.Group(): gr.Markdown("## ✏️ Custom Text Input") gr.Markdown("Enter your own texts to test (one per line):") custom_text_input = gr.Textbox( label="Custom Texts", placeholder="Enter custom texts here, one per line...\nExample:\nHello world\nBonjour le monde\nδ½ ε₯½δΈ–η•Œ", lines=5, max_lines=10 ) # Control Buttons with gr.Row(): run_demo_btn = gr.Button("πŸ” Run Demo Tests", variant="primary", size="lg") clear_btn = gr.Button("πŸ—‘οΈ Clear Results", variant="secondary") # Sample Preview Section (moved up, condensed) with gr.Group(): gr.Markdown("## πŸ“š Category Explanations") gr.Markdown("Understanding what each test category evaluates:") category_explanations = """ **Easy/Obvious:** Clear, unambiguous sentences in their native language. Tests basic language detection capability. **Short Text:** Single words or very short phrases. Tests model performance with minimal context. **False Friends:** Words that look similar across languages but have different meanings. Tests ability to distinguish between closely related languages. **Mixed Scripts:** Text containing multiple languages, numbers, symbols, or scripts. Tests handling of multilingual content. **Proper Nouns:** Names of people, places, or entities that exist across multiple languages. Tests context-dependent detection. **Common Words:** International words with similar spelling across languages (hotel, taxi, etc.). Tests disambiguation of universal terms. **Technical Terms:** Specialized vocabulary that may be borrowed or translated across languages. Tests domain-specific detection. **Code-switching:** Text that switches between languages mid-sentence. Tests handling of bilingual communication patterns. **Transliterated Text:** Non-Latin scripts written in Latin characters. Tests recognition of transliteration vs. native language. **Ambiguous Script:** Words that could belong to multiple languages with identical spelling. Tests the model's decision-making under uncertainty. """ gr.Markdown(category_explanations) # Results Section (moved to bottom) with gr.Group(): gr.Markdown("## πŸ“ˆ Results") summary_output = gr.Textbox( label="Summary", interactive=False, visible=False ) results_dataframe = gr.Dataframe( label="Model Comparison Results (A-A: Model A Dataset A, B-A: Model B Dataset A, A-B: Model A Dataset B, B-B: Model B Dataset B)", wrap=True, interactive=False, visible=False ) # Event Handlers def run_tests(selected_cats, custom_texts): summary, df = run_demo_tests(selected_cats, custom_texts, models) if df is not None: return ( gr.update(value=summary, visible=True), gr.update(value=df, visible=True) ) else: return ( gr.update(value=summary, visible=True), gr.update(visible=False) ) def clear_results(): return ( gr.update(value="", visible=False), gr.update(value=None, visible=False) ) # Connect event handlers run_demo_btn.click( fn=run_tests, inputs=[category_checkboxes, custom_text_input], outputs=[summary_output, results_dataframe] ) clear_btn.click( fn=clear_results, outputs=[summary_output, results_dataframe] ) return demo_interface def _format_model_info(model_info): """Format model information for display.""" if not model_info: return "No model information available." formatted_info = f""" **{model_info.get('name', 'Unknown Model')}** {model_info.get('description', 'No description available.')} **πŸ“Š Performance:** - Accuracy: {model_info.get('accuracy', 'N/A')} - Model Size: {model_info.get('model_size', 'N/A')} **πŸ—οΈ Architecture:** - Model Architecture: {model_info.get('architecture', 'N/A')} - Base Model: {model_info.get('base_model', 'N/A')} - Training Dataset: {model_info.get('dataset', 'N/A')} **🌐 Languages:** {model_info.get('languages_supported', 'N/A')} **βš™οΈ Training Details:** {model_info.get('training_details', 'N/A')} **πŸ’‘ Use Cases:** {model_info.get('use_cases', 'N/A')} **βœ… Strengths:** {model_info.get('strengths', 'N/A')} **⚠️ Limitations:** {model_info.get('limitations', 'N/A')} """ return formatted_info def main(): """Create the main application with tabbed interface.""" with gr.Blocks(title="Language Detection App Suite", theme=gr.themes.Soft()) as app: gr.Markdown("# 🌍 Language Detection App Suite") gr.Markdown("Choose between single model testing or comprehensive model comparison.") with gr.Tabs(): with gr.TabItem("πŸ” Single Model Detection"): single_model_interface = create_single_model_interface() with gr.TabItem("πŸš€ Model Comparison Demo"): demo_comparison_interface = create_demo_comparison_interface() return app if __name__ == "__main__": app = main() app.launch()