#!/usr/bin/env python3 """ Test script for Model B Dataset B - BERT + Enhanced Dataset This script tests the BERT based language detection model trained on the enhanced dataset, achieving the highest accuracy (99.85%). """ import sys import os # Add the project root to the Python path sys.path.insert(0, os.path.join(os.path.dirname(__file__))) from backend.language_detector import LanguageDetector def test_model_b_dataset_b(): """Test the Model B Dataset B implementation.""" print("🧪 Testing Model B Dataset B - BERT + Enhanced Dataset") print("=" * 75) try: # Initialize detector with Model B Dataset B (highest accuracy) detector = LanguageDetector(model_key="model-b-dataset-b") print("✅ Successfully initialized Model B Dataset B") # Test texts in the 20 supported languages test_texts = [ ("Hello, how are you today?", "en"), # English ("Bonjour, comment allez-vous?", "fr"), # French ("Hola, ¿cómo estás?", "es"), # Spanish ("Guten Tag, wie geht es Ihnen?", "de"), # German ("Ciao, come stai?", "it"), # Italian ("Olá, como você está?", "pt"), # Portuguese ("Привет, как дела?", "ru"), # Russian ("こんにちは、元気ですか?", "ja"), # Japanese ("你好,你好吗?", "zh"), # Chinese ("مرحبا، كيف حالك؟", "ar"), # Arabic ("नमस्ते, आप कैसे हैं?", "hi"), # Hindi ("Hallo, hoe gaat het met je?", "nl"), # Dutch ("Γεια σας, πώς είστε;", "el"), # Greek ("Здравейте, как сте?", "bg"), # Bulgarian ("Witaj, jak się masz?", "pl"), # Polish ("สวัสดี คุณเป็นอย่างไรบ้าง?", "th"), # Thai ("Merhaba, nasılsınız?", "tr"), # Turkish ("آپ کیسے ہیں؟", "ur"), # Urdu ("Xin chào, bạn khỏe không?", "vi"), # Vietnamese ("Habari, unajehje?", "sw") # Swahili ] print("\n🔍 Running language detection tests on 20 supported languages:") print("-" * 75) correct_predictions = 0 total_predictions = len(test_texts) for text, expected_lang in test_texts: try: result = detector.detect_language(text) predicted_lang = result['language_code'] confidence = result['confidence'] language_name = result['language'] # Check if prediction is correct is_correct = predicted_lang == expected_lang if is_correct: correct_predictions += 1 status = "✅" else: status = "❌" print(f"{status} Text: {text[:40]}{'...' if len(text) > 40 else ''}") print(f" Expected: {expected_lang} | Predicted: {predicted_lang} ({language_name})") print(f" Confidence: {confidence:.4f}") print() except Exception as e: print(f"❌ Error testing '{text[:30]}...': {str(e)}") print() # Calculate accuracy accuracy = (correct_predictions / total_predictions) * 100 print(f"📊 Test Results: {correct_predictions}/{total_predictions} correct") print(f"📈 Accuracy: {accuracy:.1f}%") # Test model info print("\n📋 Model Information:") print("-" * 75) model_info = detector.get_current_model_info() for key, value in model_info.items(): print(f"{key.title().replace('_', ' ')}: {value}") print("🎉 Model B Dataset B test completed successfully!") except Exception as e: print(f"❌ Test failed: {str(e)}") import traceback traceback.print_exc() return False return True def test_all_models_comprehensive(): """Test and compare all four available model combinations.""" print("\n🔄 Comprehensive All-Model Combinations Comparison") print("=" * 75) models_to_test = [ ("model-a-dataset-a", "Model A Dataset A", "XLM-RoBERTa + Standard", "97.9%"), ("model-b-dataset-a", "Model B Dataset A", "BERT + Standard", "96.17%"), ("model-a-dataset-b", "Model A Dataset B", "XLM-RoBERTa + Enhanced", "99.72%"), ("model-b-dataset-b", "Model B Dataset B", "BERT + Enhanced", "99.85%") ] test_texts = [ "Hello, this is a test in English.", "Bonjour, ceci est un test en français.", "Hola, esto es una prueba en español.", "Guten Tag, das ist ein Test auf Deutsch." ] print("🧪 Testing with multiple sentences across all model combinations:") print("-" * 75) try: results_summary = {} for model_key, model_name, description, claimed_accuracy in models_to_test: print(f"\n🤖 Testing {model_name} ({description}) - Claimed: {claimed_accuracy}") print("-" * 60) try: detector = LanguageDetector(model_key=model_key) model_results = [] for text in test_texts: result = detector.detect_language(text) model_results.append({ 'text': text[:30] + '...' if len(text) > 30 else text, 'language': result['language'], 'code': result['language_code'], 'confidence': result['confidence'] }) print(f" Text: {text[:30]}{'...' if len(text) > 30 else ''}") print(f" → {result['language']} ({result['language_code']}) - {result['confidence']:.4f}") results_summary[model_name] = model_results print(f"✅ {model_name} completed successfully") except Exception as e: print(f"❌ {model_name}: {str(e)}") results_summary[model_name] = f"Error: {str(e)}" print(f"\n📊 All Model Combinations Testing Summary:") print("-" * 75) for model_name, results in results_summary.items(): if isinstance(results, str): print(f"❌ {model_name}: {results}") else: avg_confidence = sum(r['confidence'] for r in results) / len(results) print(f"✅ {model_name}: Avg Confidence: {avg_confidence:.4f}") print("🎉 Comprehensive model comparison completed successfully!") return True except Exception as e: print(f"❌ Comprehensive test failed: {str(e)}") return False def test_model_architecture(): """Test the model architecture information for Model B Dataset B.""" print("\n🏗️ Testing Model B Dataset B Architecture Information") print("=" * 75) try: detector = LanguageDetector(model_key="model-b-dataset-b") model_info = detector.get_current_model_info() # Verify key architecture information expected_info = { "architecture": "BERT", "dataset": "Dataset B", "accuracy": "99.85%", "model_size": "178M parameters" } print("🔍 Verifying model architecture information:") print("-" * 50) all_correct = True for key, expected_value in expected_info.items(): actual_value = model_info.get(key, "Not found") if actual_value == expected_value: print(f"✅ {key}: {actual_value}") else: print(f"❌ {key}: Expected '{expected_value}', got '{actual_value}'") all_correct = False if all_correct: print("\n🎉 All architecture information verified successfully!") else: print("\n⚠️ Some architecture information mismatches found.") return all_correct except Exception as e: print(f"❌ Architecture test failed: {str(e)}") return False if __name__ == "__main__": print("🚀 Starting Model B Dataset B Tests\n") # Run tests test1_passed = test_model_b_dataset_b() test2_passed = test_all_models_comprehensive() test3_passed = test_model_architecture() # Final results print("\n" + "=" * 75) if test1_passed and test2_passed and test3_passed: print("🎉 All tests passed! Model B Dataset B is ready to use.") print("🏆 This model offers the highest accuracy (99.85%) of all available models!") print("📝 Note: Optimized for 20 carefully selected languages for maximum precision.") else: print("❌ Some tests failed. Please check the implementation.") sys.exit(1)