Spaces:

yangdingcheok
/

language-detection-assignment

Running

language-detection-assignment / test_model_b_dataset_b.py

yangding14

first commit

72f90b1 1 day ago

9.21 kB

	#!/usr/bin/env python3
	"""
	Test script for Model B Dataset B - BERT + Enhanced Dataset

	This script tests the BERT based language detection model
	trained on the enhanced dataset, achieving the highest accuracy (99.85%).
	"""

	import sys
	import os

	# Add the project root to the Python path
	sys.path.insert(0, os.path.join(os.path.dirname(__file__)))

	from backend.language_detector import LanguageDetector


	def test_model_b_dataset_b():
	"""Test the Model B Dataset B implementation."""
	print("🧪 Testing Model B Dataset B - BERT + Enhanced Dataset")
	print("=" * 75)

	try:
	# Initialize detector with Model B Dataset B (highest accuracy)
	detector = LanguageDetector(model_key="model-b-dataset-b")
	print("✅ Successfully initialized Model B Dataset B")

	# Test texts in the 20 supported languages
	test_texts = [
	("Hello, how are you today?", "en"), # English
	("Bonjour, comment allez-vous?", "fr"), # French
	("Hola, ¿cómo estás?", "es"), # Spanish
	("Guten Tag, wie geht es Ihnen?", "de"), # German
	("Ciao, come stai?", "it"), # Italian
	("Olá, como você está?", "pt"), # Portuguese
	("Привет, как дела?", "ru"), # Russian
	("こんにちは、元気ですか？", "ja"), # Japanese
	("你好，你好吗？", "zh"), # Chinese
	("مرحبا، كيف حالك؟", "ar"), # Arabic
	("नमस्ते, आप कैसे हैं?", "hi"), # Hindi
	("Hallo, hoe gaat het met je?", "nl"), # Dutch
	("Γεια σας, πώς είστε;", "el"), # Greek
	("Здравейте, как сте?", "bg"), # Bulgarian
	("Witaj, jak się masz?", "pl"), # Polish
	("สวัสดี คุณเป็นอย่างไรบ้าง?", "th"), # Thai
	("Merhaba, nasılsınız?", "tr"), # Turkish
	("آپ کیسے ہیں؟", "ur"), # Urdu
	("Xin chào, bạn khỏe không?", "vi"), # Vietnamese
	("Habari, unajehje?", "sw") # Swahili
	]

	print("\n🔍 Running language detection tests on 20 supported languages:")
	print("-" * 75)

	correct_predictions = 0
	total_predictions = len(test_texts)

	for text, expected_lang in test_texts:
	try:
	result = detector.detect_language(text)
	predicted_lang = result['language_code']
	confidence = result['confidence']
	language_name = result['language']

	# Check if prediction is correct
	is_correct = predicted_lang == expected_lang
	if is_correct:
	correct_predictions += 1
	status = "✅"
	else:
	status = "❌"

	print(f"{status} Text: {text[:40]}{'...' if len(text) > 40 else ''}")
	print(f" Expected: {expected_lang} \| Predicted: {predicted_lang} ({language_name})")
	print(f" Confidence: {confidence:.4f}")
	print()

	except Exception as e:
	print(f"❌ Error testing '{text[:30]}...': {str(e)}")
	print()

	# Calculate accuracy
	accuracy = (correct_predictions / total_predictions) * 100
	print(f"📊 Test Results: {correct_predictions}/{total_predictions} correct")
	print(f"📈 Accuracy: {accuracy:.1f}%")

	# Test model info
	print("\n📋 Model Information:")
	print("-" * 75)
	model_info = detector.get_current_model_info()
	for key, value in model_info.items():
	print(f"{key.title().replace('_', ' ')}: {value}")

	print("🎉 Model B Dataset B test completed successfully!")

	except Exception as e:
	print(f"❌ Test failed: {str(e)}")
	import traceback
	traceback.print_exc()
	return False

	return True


	def test_all_models_comprehensive():
	"""Test and compare all four available model combinations."""
	print("\n🔄 Comprehensive All-Model Combinations Comparison")
	print("=" * 75)

	models_to_test = [
	("model-a-dataset-a", "Model A Dataset A", "XLM-RoBERTa + Standard", "97.9%"),
	("model-b-dataset-a", "Model B Dataset A", "BERT + Standard", "96.17%"),
	("model-a-dataset-b", "Model A Dataset B", "XLM-RoBERTa + Enhanced", "99.72%"),
	("model-b-dataset-b", "Model B Dataset B", "BERT + Enhanced", "99.85%")
	]

	test_texts = [
	"Hello, this is a test in English.",
	"Bonjour, ceci est un test en français.",
	"Hola, esto es una prueba en español.",
	"Guten Tag, das ist ein Test auf Deutsch."
	]

	print("🧪 Testing with multiple sentences across all model combinations:")
	print("-" * 75)

	try:
	results_summary = {}

	for model_key, model_name, description, claimed_accuracy in models_to_test:
	print(f"\n🤖 Testing {model_name} ({description}) - Claimed: {claimed_accuracy}")
	print("-" * 60)

	try:
	detector = LanguageDetector(model_key=model_key)
	model_results = []

	for text in test_texts:
	result = detector.detect_language(text)
	model_results.append({
	'text': text[:30] + '...' if len(text) > 30 else text,
	'language': result['language'],
	'code': result['language_code'],
	'confidence': result['confidence']
	})

	print(f" Text: {text[:30]}{'...' if len(text) > 30 else ''}")
	print(f" → {result['language']} ({result['language_code']}) - {result['confidence']:.4f}")

	results_summary[model_name] = model_results
	print(f"✅ {model_name} completed successfully")

	except Exception as e:
	print(f"❌ {model_name}: {str(e)}")
	results_summary[model_name] = f"Error: {str(e)}"

	print(f"\n📊 All Model Combinations Testing Summary:")
	print("-" * 75)
	for model_name, results in results_summary.items():
	if isinstance(results, str):
	print(f"❌ {model_name}: {results}")
	else:
	avg_confidence = sum(r['confidence'] for r in results) / len(results)
	print(f"✅ {model_name}: Avg Confidence: {avg_confidence:.4f}")

	print("🎉 Comprehensive model comparison completed successfully!")
	return True

	except Exception as e:
	print(f"❌ Comprehensive test failed: {str(e)}")
	return False


	def test_model_architecture():
	"""Test the model architecture information for Model B Dataset B."""
	print("\n🏗️ Testing Model B Dataset B Architecture Information")
	print("=" * 75)

	try:
	detector = LanguageDetector(model_key="model-b-dataset-b")
	model_info = detector.get_current_model_info()

	# Verify key architecture information
	expected_info = {
	"architecture": "BERT",
	"dataset": "Dataset B",
	"accuracy": "99.85%",
	"model_size": "178M parameters"
	}

	print("🔍 Verifying model architecture information:")
	print("-" * 50)

	all_correct = True
	for key, expected_value in expected_info.items():
	actual_value = model_info.get(key, "Not found")
	if actual_value == expected_value:
	print(f"✅ {key}: {actual_value}")
	else:
	print(f"❌ {key}: Expected '{expected_value}', got '{actual_value}'")
	all_correct = False

	if all_correct:
	print("\n🎉 All architecture information verified successfully!")
	else:
	print("\n⚠️ Some architecture information mismatches found.")

	return all_correct

	except Exception as e:
	print(f"❌ Architecture test failed: {str(e)}")
	return False


	if __name__ == "__main__":
	print("🚀 Starting Model B Dataset B Tests\n")

	# Run tests
	test1_passed = test_model_b_dataset_b()
	test2_passed = test_all_models_comprehensive()
	test3_passed = test_model_architecture()

	# Final results
	print("\n" + "=" * 75)
	if test1_passed and test2_passed and test3_passed:
	print("🎉 All tests passed! Model B Dataset B is ready to use.")
	print("🏆 This model offers the highest accuracy (99.85%) of all available models!")
	print("📝 Note: Optimized for 20 carefully selected languages for maximum precision.")
	else:
	print("❌ Some tests failed. Please check the implementation.")
	sys.exit(1)