Tantan18's picture
Update app.py
9c98934 verified
import gradio as gr
import torch
import numpy as np
from transformers import (
AutoModel, AutoProcessor, AutoFeatureExtractor,
AutoTokenizer, pipeline
)
import warnings
warnings.filterwarnings("ignore")
def test_single_model(model_name):
"""Test compatibility of a single model"""
if not model_name.strip():
return "Please enter a model name"
result_text = f"πŸ” Testing Model: {model_name}\n"
result_text += "=" * 60 + "\n\n"
try:
# 1. Load model
result_text += "1️⃣ Loading Model...\n"
model = AutoModel.from_pretrained(model_name)
result_text += " βœ… Model loaded successfully\n"
result_text += f" πŸ“Š Model type: {model.config.model_type}\n"
result_text += f" πŸ—οΈ Model class: {model.__class__.__name__}\n\n"
# 2. Check model architecture
result_text += "2️⃣ Checking Model Architecture...\n"
if hasattr(model.config, 'hidden_size'):
result_text += f" πŸ”’ Hidden size: {model.config.hidden_size}\n"
if hasattr(model.config, 'num_hidden_layers'):
result_text += f" πŸ“š Number of layers: {model.config.num_hidden_layers}\n"
if hasattr(model.config, 'vocab_size'):
result_text += f" πŸ“– Vocabulary size: {model.config.vocab_size}\n"
result_text += "\n"
# 3. Try to load processor
result_text += "3️⃣ Loading Processor...\n"
processor = None
supports_audio = False
try:
processor = AutoProcessor.from_pretrained(model_name)
result_text += f" βœ… Processor loaded successfully: {processor.__class__.__name__}\n"
supports_audio = True
except:
try:
processor = AutoFeatureExtractor.from_pretrained(model_name)
result_text += f" βœ… Feature extractor loaded successfully: {processor.__class__.__name__}\n"
supports_audio = True
except:
result_text += " ❌ Cannot load audio processor\n"
supports_audio = False
result_text += "\n"
# 4. Check input requirements
result_text += "4️⃣ Checking Input Requirements...\n"
sampling_rate = 16000 # Default value
if processor and supports_audio:
if hasattr(processor, 'sampling_rate'):
sampling_rate = processor.sampling_rate
result_text += f" 🎡 Sampling rate: {sampling_rate} Hz\n"
if hasattr(processor, 'feature_size'):
result_text += f" πŸ“ Feature dimension: {processor.feature_size}\n"
if hasattr(processor, 'return_attention_mask'):
result_text += f" 🎭 Supports attention mask: {processor.return_attention_mask}\n"
result_text += "\n"
# 5. Test inference
result_text += "5️⃣ Testing Inference...\n"
if supports_audio:
try:
# Create dummy audio data (2 seconds)
dummy_audio = np.random.randn(sampling_rate * 2).astype(np.float32)
# Process audio
inputs = processor(dummy_audio, sampling_rate=sampling_rate, return_tensors="pt")
# Model inference
with torch.no_grad():
outputs = model(**inputs)
# Check output
if hasattr(outputs, 'last_hidden_state'):
shape = outputs.last_hidden_state.shape
result_text += f" βœ… Inference successful! Hidden state shape: {shape}\n"
elif hasattr(outputs, 'logits'):
shape = outputs.logits.shape
result_text += f" βœ… Inference successful! Logits shape: {shape}\n"
else:
result_text += f" βœ… Inference successful! Output type: {type(outputs)}\n"
except Exception as e:
result_text += f" ❌ Inference failed: {str(e)}\n"
else:
result_text += " ⚠️ Audio input not supported, skipping inference test\n"
result_text += "\n"
# 6. Multilingual support check
result_text += "6️⃣ Multilingual Support Check...\n"
multilingual = False
if hasattr(model.config, 'vocab_size') and model.config.vocab_size > 50000:
result_text += f" βœ… Likely supports multiple languages (large vocabulary: {model.config.vocab_size})\n"
multilingual = True
elif any(keyword in model_name.lower() for keyword in ['xlsr', 'multilingual', 'cross-lingual']):
result_text += " βœ… Supports multiple languages based on model name\n"
multilingual = True
else:
result_text += " ❓ Multilingual support unclear\n"
result_text += "\n"
# 7. Depression detection suitability scoring
result_text += "7️⃣ Depression Detection Suitability Assessment...\n"
score = 0
max_score = 15
# Most important: Specifically for depression/mental health detection (6 points)
depression_keywords = ['depression', 'mental-health', 'psychological', 'mood', 'phq']
emotion_keywords = ['emotion', 'sentiment', 'affective', 'feeling']
if any(keyword in model_name.lower() for keyword in depression_keywords):
score += 6
result_text += " 🎯 Specifically for depression/mental health detection (+6 points)\n"
elif any(keyword in model_name.lower() for keyword in emotion_keywords):
score += 3
result_text += " 😊 For emotion recognition, potentially applicable (+3 points)\n"
# Basic requirement: Audio input support (2 points)
if supports_audio:
score += 2
result_text += " 🎡 Supports audio input (+2 points)\n"
else:
result_text += " ❌ Does not support audio input (0 points)\n"
# Multilingual support (2 points)
if multilingual:
score += 2
result_text += " 🌍 Supports multiple languages (+2 points)\n"
# Architecture suitability (2 points)
if model.config.model_type in ['wav2vec2', 'hubert', 'wavlm']:
score += 2
result_text += " πŸ—οΈ Excellent speech representation learning architecture (+2 points)\n"
elif model.config.model_type == 'whisper':
score += 1
result_text += " ⚠️ Whisper architecture needs modification for classification (+1 point)\n"
# Check if configured for classification
if hasattr(model.config, 'num_labels'):
if model.config.num_labels == 2:
score += 1
result_text += f" βœ… Binary classification task configuration (likely depression detection) (+1 point)\n"
else:
score += 0.5
result_text += f" ⚠️ Multi-class task ({model.config.num_labels} classes) (+0.5 points)\n"
# Check for training dataset clues
daic_keywords = ['daic', 'wizard-of-oz', 'depression-detection', 'clinical']
if any(keyword in model_name.lower() for keyword in daic_keywords):
score += 2
result_text += " πŸ“Š Possibly trained on clinical depression datasets (+2 points)\n"
result_text += f"\n🎯 Depression Detection Suitability Score: {score}/{max_score}\n"
# 8. Recommendations
result_text += "\n8️⃣ Usage Recommendations...\n"
if score >= 12:
result_text += " 🌟 Highly recommended! Specifically for depression detection, very suitable\n"
elif score >= 8:
result_text += " πŸ‘ Recommended, may need some fine-tuning\n"
elif score >= 5:
result_text += " ⚠️ Use with caution, may need significant modification\n"
else:
result_text += " ❌ Not recommended, suggest finding specialized depression detection models\n"
# 9. Further inspection suggestions
result_text += "\n9️⃣ Further Inspection Suggestions...\n"
result_text += " πŸ” Check model card for training data description\n"
result_text += " πŸ“Š Check if DAIC-WOZ or other depression datasets are mentioned\n"
result_text += " πŸ“ Check papers or documentation for task description\n"
result_text += " πŸ§ͺ Test with small samples to see if model output matches depression detection expectations\n"
return result_text
except Exception as e:
error_msg = f"❌ Model test failed: {str(e)}\n"
error_msg += "\nPossible causes:\n"
error_msg += "β€’ Incorrect model name\n"
error_msg += "β€’ Model requires special permissions\n"
error_msg += "β€’ Network connection issues\n"
error_msg += "β€’ Model architecture incompatibility\n"
return error_msg
def test_recommended_models():
"""Test recommended model list"""
recommended_models = [
"facebook/wav2vec2-large-xlsr-53",
"microsoft/wavlm-large",
"harshit345/xlsr-wav2vec-speech-emotion-recognition",
"audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim",
"speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
]
result_text = "πŸ” Batch Testing Recommended Models\n"
result_text += "=" * 60 + "\n\n"
results = []
for i, model_name in enumerate(recommended_models, 1):
result_text += f"πŸ“Š Testing {i}/{len(recommended_models)}: {model_name}\n"
result_text += "-" * 50 + "\n"
try:
# Simplified quick test
model = AutoModel.from_pretrained(model_name)
# Check audio support
supports_audio = False
try:
processor = AutoProcessor.from_pretrained(model_name)
supports_audio = True
except:
try:
processor = AutoFeatureExtractor.from_pretrained(model_name)
supports_audio = True
except:
pass
# Check multilingual
multilingual = False
if hasattr(model.config, 'vocab_size') and model.config.vocab_size > 50000:
multilingual = True
elif any(keyword in model_name.lower() for keyword in ['xlsr', 'multilingual']):
multilingual = True
# Calculate simplified score
score = 0
if supports_audio:
score += 3
if multilingual:
score += 2
if model.config.model_type in ['wav2vec2', 'hubert', 'wavlm']:
score += 3
results.append({
'name': model_name,
'score': score,
'audio': supports_audio,
'multilingual': multilingual,
'type': model.config.model_type
})
result_text += f"βœ… Loaded successfully | Audio: {'βœ…' if supports_audio else '❌'} | Multilingual: {'βœ…' if multilingual else '❌'} | Score: {score}/8\n\n"
except Exception as e:
result_text += f"❌ Loading failed: {str(e)}\n\n"
# Sort and recommend
results.sort(key=lambda x: x['score'], reverse=True)
result_text += "πŸ† Recommendation Rankings:\n"
result_text += "=" * 40 + "\n"
for i, model in enumerate(results, 1):
result_text += f"{i}. {model['name']}\n"
result_text += f" Score: {model['score']}/8 | Type: {model['type']}\n\n"
return result_text
# Create Gradio interface
with gr.Blocks(title="πŸ€– Depression Detection Model Compatibility Test") as app:
gr.Markdown("""
# πŸ€– Depression Detection Model Compatibility Test Tool
This tool helps you quickly test whether Hugging Face models are suitable for depression detection tasks.
## Features:
- βœ… Check model loading compatibility
- 🎡 Verify audio input support
- 🌍 Assess multilingual capabilities
- πŸ“Š Suitability scoring (0-15 points)
- πŸ’‘ Usage recommendations
""")
with gr.Tab("Single Model Test"):
with gr.Row():
model_input = gr.Textbox(
placeholder="Enter model name, e.g.: facebook/wav2vec2-large-xlsr-53",
label="πŸ” Model Name",
value="ireneminhee/speech-to-depression"
)
test_btn = gr.Button("πŸš€ Start Test", variant="primary")
result_output = gr.Textbox(
label="πŸ“‹ Test Results",
lines=25,
max_lines=50
)
test_btn.click(
fn=test_single_model,
inputs=[model_input],
outputs=[result_output]
)
with gr.Tab("Recommended Models Batch Test"):
gr.Markdown("""
### 🌟 Recommended Depression Detection Candidate Models
These models perform well in speech emotion recognition and multilingual support:
- `facebook/wav2vec2-large-xlsr-53` - Multilingual speech representation learning
- `microsoft/wavlm-large` - Speech understanding specialized model
- `harshit345/xlsr-wav2vec-speech-emotion-recognition` - Emotion recognition
- `audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim` - Emotion dimension recognition
- `speechbrain/emotion-recognition-wav2vec2-IEMOCAP` - Emotion classification
""")
batch_test_btn = gr.Button("πŸ” Batch Test Recommended Models", variant="primary")
batch_result_output = gr.Textbox(
label="πŸ“Š Batch Test Results",
lines=20,
max_lines=50
)
batch_test_btn.click(
fn=test_recommended_models,
inputs=[],
outputs=[batch_result_output]
)
with gr.Tab("Usage Instructions"):
gr.Markdown("""
## πŸ“– Usage Instructions
### Scoring Criteria (Redesigned):
- **Depression-specific model** (+6 points): Specifically for depression/mental health detection
- **Emotion recognition model** (+3 points): For emotion recognition, potentially applicable
- **Audio support** (+2 points): Whether the model can process audio input
- **Multilingual support** (+2 points): Support for Chinese, English, German, Russian
- **Architecture suitability** (+2 points): Whether model architecture is suitable for speech classification
- **Classification configuration** (+1 point): Whether configured for classification tasks
- **Clinical datasets** (+2 points): Whether trained on clinical depression datasets
### Score Interpretation:
- **12-15 points**: 🌟 Highly recommended, specialized depression detection model
- **8-11 points**: πŸ‘ Recommended, may need fine-tuning
- **5-7 points**: ⚠️ Use with caution, needs modification
- **0-4 points**: ❌ Not recommended
### Next Steps:
1. Select the top 2-3 models with highest scores
2. Conduct in-depth testing in Google Colab
3. Fine-tune using DAIC-WOZ dataset
4. Final evaluation with your multilingual data
""")
# Launch application
if __name__ == "__main__":
app.launch()