Spaces:

Tantan18
/

depression-model-tester

Running

App Files Files Community

Tantan18 commited on 29 days ago

Commit

3df16ad

verified ·

1 Parent(s): bd09630

Create app.py

Browse files

Files changed (1) hide show

app.py +363 -0

app.py ADDED Viewed

	@@ -0,0 +1,363 @@

+import gradio as gr
+import torch
+import numpy as np
+from transformers import (
+    AutoModel, AutoProcessor, AutoFeatureExtractor,
+    AutoTokenizer, pipeline
+)
+import warnings
+warnings.filterwarnings("ignore")
+def test_single_model(model_name):
+    """Test compatibility of a single model"""
+    if not model_name.strip():
+        return "Please enter a model name"
+    result_text = f"🔍 Testing Model: {model_name}\n"
+    result_text += "=" * 60 + "\n\n"
+    try:
+        # 1. Load model
+        result_text += "1️⃣ Loading Model...\n"
+        model = AutoModel.from_pretrained(model_name)
+        result_text += "   ✅ Model loaded successfully\n"
+        result_text += f"   📊 Model type: {model.config.model_type}\n"
+        result_text += f"   🏗️ Model class: {model.__class__.__name__}\n\n"
+        # 2. Check model architecture
+        result_text += "2️⃣ Checking Model Architecture...\n"
+        if hasattr(model.config, 'hidden_size'):
+            result_text += f"   🔢 Hidden size: {model.config.hidden_size}\n"
+        if hasattr(model.config, 'num_hidden_layers'):
+            result_text += f"   📚 Number of layers: {model.config.num_hidden_layers}\n"
+        if hasattr(model.config, 'vocab_size'):
+            result_text += f"   📖 Vocabulary size: {model.config.vocab_size}\n"
+        result_text += "\n"
+        # 3. Try to load processor
+        result_text += "3️⃣ Loading Processor...\n"
+        processor = None
+        supports_audio = False
+        try:
+            processor = AutoProcessor.from_pretrained(model_name)
+            result_text += f"   ✅ Processor loaded successfully: {processor.__class__.__name__}\n"
+            supports_audio = True
+        except:
+            try:
+                processor = AutoFeatureExtractor.from_pretrained(model_name)
+                result_text += f"   ✅ Feature extractor loaded successfully: {processor.__class__.__name__}\n"
+                supports_audio = True
+            except:
+                result_text += "   ❌ Cannot load audio processor\n"
+                supports_audio = False
+        result_text += "\n"
+        # 4. Check input requirements
+        result_text += "4️⃣ Checking Input Requirements...\n"
+        sampling_rate = 16000  # Default value
+        if processor and supports_audio:
+            if hasattr(processor, 'sampling_rate'):
+                sampling_rate = processor.sampling_rate
+                result_text += f"   🎵 Sampling rate: {sampling_rate} Hz\n"
+            if hasattr(processor, 'feature_size'):
+                result_text += f"   📏 Feature dimension: {processor.feature_size}\n"
+            if hasattr(processor, 'return_attention_mask'):
+                result_text += f"   🎭 Supports attention mask: {processor.return_attention_mask}\n"
+        result_text += "\n"
+        # 5. Test inference
+        result_text += "5️⃣ Testing Inference...\n"
+        if supports_audio:
+            try:
+                # Create dummy audio data (2 seconds)
+                dummy_audio = np.random.randn(sampling_rate * 2).astype(np.float32)
+                # Process audio
+                inputs = processor(dummy_audio, sampling_rate=sampling_rate, return_tensors="pt")
+                # Model inference
+                with torch.no_grad():
+                    outputs = model(**inputs)
+                # Check output
+                if hasattr(outputs, 'last_hidden_state'):
+                    shape = outputs.last_hidden_state.shape
+                    result_text += f"   ✅ Inference successful! Hidden state shape: {shape}\n"
+                elif hasattr(outputs, 'logits'):
+                    shape = outputs.logits.shape
+                    result_text += f"   ✅ Inference successful! Logits shape: {shape}\n"
+                else:
+                    result_text += f"   ✅ Inference successful! Output type: {type(outputs)}\n"
+            except Exception as e:
+                result_text += f"   ❌ Inference failed: {str(e)}\n"
+        else:
+            result_text += "   ⚠️ Audio input not supported, skipping inference test\n"
+        result_text += "\n"
+        # 6. Multilingual support check
+        result_text += "6️⃣ Multilingual Support Check...\n"
+        multilingual = False
+        if hasattr(model.config, 'vocab_size') and model.config.vocab_size > 50000:
+            result_text += f"   ✅ Likely supports multiple languages (large vocabulary: {model.config.vocab_size})\n"
+            multilingual = True
+        elif any(keyword in model_name.lower() for keyword in ['xlsr', 'multilingual', 'cross-lingual']):
+            result_text += "   ✅ Supports multiple languages based on model name\n"
+            multilingual = True
+        else:
+            result_text += "   ❓ Multilingual support unclear\n"
+        result_text += "\n"
+        # 7. Depression detection suitability scoring
+        result_text += "7️⃣ Depression Detection Suitability Assessment...\n"
+        score = 0
+        max_score = 15
+        # Most important: Specifically for depression/mental health detection (6 points)
+        depression_keywords = ['depression', 'mental-health', 'psychological', 'mood', 'phq']
+        if any(keyword in model_name.lower() for keyword in depression_keywords):
+            score += 6
+            result_text += "   🎯 Specifically for depression/mental health detection (+6 points)\n"
+        # Secondary: For emotion recognition (3 points)
+        emotion_keywords = ['emotion', 'sentiment', 'affective', 'feeling']
+        elif any(keyword in model_name.lower() for keyword in emotion_keywords):
+            score += 3
+            result_text += "   😊 For emotion recognition, potentially applicable (+3 points)\n"
+        # Basic requirement: Audio input support (2 points)
+        if supports_audio:
+            score += 2
+            result_text += "   🎵 Supports audio input (+2 points)\n"
+        else:
+            result_text += "   ❌ Does not support audio input (0 points)\n"
+        # Multilingual support (2 points)
+        if multilingual:
+            score += 2
+            result_text += "   🌍 Supports multiple languages (+2 points)\n"
+        # Architecture suitability (2 points)
+        if model.config.model_type in ['wav2vec2', 'hubert', 'wavlm']:
+            score += 2
+            result_text += "   🏗️ Excellent speech representation learning architecture (+2 points)\n"
+        elif model.config.model_type == 'whisper':
+            score += 1
+            result_text += "   ⚠️ Whisper architecture needs modification for classification (+1 point)\n"
+        # Check if configured for classification
+        if hasattr(model.config, 'num_labels'):
+            if model.config.num_labels == 2:
+                score += 1
+                result_text += f"   ✅ Binary classification task configuration (likely depression detection) (+1 point)\n"
+            else:
+                score += 0.5
+                result_text += f"   ⚠️ Multi-class task ({model.config.num_labels} classes) (+0.5 points)\n"
+        # Check for training dataset clues
+        daic_keywords = ['daic', 'wizard-of-oz', 'depression-detection', 'clinical']
+        if any(keyword in model_name.lower() for keyword in daic_keywords):
+            score += 2
+            result_text += "   📊 Possibly trained on clinical depression datasets (+2 points)\n"
+        result_text += f"\n🎯 Depression Detection Suitability Score: {score}/{max_score}\n"
+        # 8. Recommendations
+        result_text += "\n8️⃣ Usage Recommendations...\n"
+        if score >= 12:
+            result_text += "   🌟 Highly recommended! Specifically for depression detection, very suitable\n"
+        elif score >= 8:
+            result_text += "   👍 Recommended, may need some fine-tuning\n"
+        elif score >= 5:
+            result_text += "   ⚠️ Use with caution, may need significant modification\n"
+        else:
+            result_text += "   ❌ Not recommended, suggest finding specialized depression detection models\n"
+        # 9. Further inspection suggestions
+        result_text += "\n9️⃣ Further Inspection Suggestions...\n"
+        result_text += "   🔍 Check model card for training data description\n"
+        result_text += "   📊 Check if DAIC-WOZ or other depression datasets are mentioned\n"
+        result_text += "   📝 Check papers or documentation for task description\n"
+        result_text += "   🧪 Test with small samples to see if model output matches depression detection expectations\n"
+        return result_text
+    except Exception as e:
+        error_msg = f"❌ Model test failed: {str(e)}\n"
+        error_msg += "\nPossible causes:\n"
+        error_msg += "• Incorrect model name\n"
+        error_msg += "• Model requires special permissions\n"
+        error_msg += "• Network connection issues\n"
+        error_msg += "• Model architecture incompatibility\n"
+        return error_msg
+def test_recommended_models():
+    """Test recommended model list"""
+    recommended_models = [
+        "facebook/wav2vec2-large-xlsr-53",
+        "microsoft/wavlm-large",
+        "harshit345/xlsr-wav2vec-speech-emotion-recognition",
+        "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim",
+        "speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
+    ]
+    result_text = "🔍 Batch Testing Recommended Models\n"
+    result_text += "=" * 60 + "\n\n"
+    results = []
+    for i, model_name in enumerate(recommended_models, 1):
+        result_text += f"📊 Testing {i}/{len(recommended_models)}: {model_name}\n"
+        result_text += "-" * 50 + "\n"
+        try:
+            # Simplified quick test
+            model = AutoModel.from_pretrained(model_name)
+            # Check audio support
+            supports_audio = False
+            try:
+                processor = AutoProcessor.from_pretrained(model_name)
+                supports_audio = True
+            except:
+                try:
+                    processor = AutoFeatureExtractor.from_pretrained(model_name)
+                    supports_audio = True
+                except:
+                    pass
+            # Check multilingual
+            multilingual = False
+            if hasattr(model.config, 'vocab_size') and model.config.vocab_size > 50000:
+                multilingual = True
+            elif any(keyword in model_name.lower() for keyword in ['xlsr', 'multilingual']):
+                multilingual = True
+            # Calculate simplified score
+            score = 0
+            if supports_audio:
+                score += 3
+            if multilingual:
+                score += 2
+            if model.config.model_type in ['wav2vec2', 'hubert', 'wavlm']:
+                score += 3
+            results.append({
+                'name': model_name,
+                'score': score,
+                'audio': supports_audio,
+                'multilingual': multilingual,
+                'type': model.config.model_type
+            })
+            result_text += f"✅ Loaded successfully | Audio: {'✅' if supports_audio else '❌'} | Multilingual: {'✅' if multilingual else '❌'} | Score: {score}/8\n\n"
+        except Exception as e:
+            result_text += f"❌ Loading failed: {str(e)}\n\n"
+    # Sort and recommend
+    results.sort(key=lambda x: x['score'], reverse=True)
+    result_text += "🏆 Recommendation Rankings:\n"
+    result_text += "=" * 40 + "\n"
+    for i, model in enumerate(results, 1):
+        result_text += f"{i}. {model['name']}\n"
+        result_text += f"   Score: {model['score']}/8 | Type: {model['type']}\n\n"
+    return result_text
+# Create Gradio interface
+with gr.Blocks(title="🤖 Depression Detection Model Compatibility Test") as app:
+    gr.Markdown("""
+    # 🤖 Depression Detection Model Compatibility Test Tool
+    This tool helps you quickly test whether Hugging Face models are suitable for depression detection tasks.
+    ## Features:
+    - ✅ Check model loading compatibility
+    - 🎵 Verify audio input support
+    - 🌍 Assess multilingual capabilities
+    - 📊 Suitability scoring (0-15 points)
+    - 💡 Usage recommendations
+    """)
+    with gr.Tab("Single Model Test"):
+        with gr.Row():
+            model_input = gr.Textbox(
+                placeholder="Enter model name, e.g.: facebook/wav2vec2-large-xlsr-53",
+                label="🔍 Model Name",
+                value="ireneminhee/speech-to-depression"
+            )
+            test_btn = gr.Button("🚀 Start Test", variant="primary")
+        result_output = gr.Textbox(
+            label="📋 Test Results",
+            lines=25,
+            max_lines=50
+        )
+        test_btn.click(
+            fn=test_single_model,
+            inputs=[model_input],
+            outputs=[result_output]
+        )
+    with gr.Tab("Recommended Models Batch Test"):
+        gr.Markdown("""
+        ### 🌟 Recommended Depression Detection Candidate Models
+        These models perform well in speech emotion recognition and multilingual support:
+        - `facebook/wav2vec2-large-xlsr-53` - Multilingual speech representation learning
+        - `microsoft/wavlm-large` - Speech understanding specialized model
+        - `harshit345/xlsr-wav2vec-speech-emotion-recognition` - Emotion recognition
+        - `audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim` - Emotion dimension recognition
+        - `speechbrain/emotion-recognition-wav2vec2-IEMOCAP` - Emotion classification
+        """)
+        batch_test_btn = gr.Button("🔍 Batch Test Recommended Models", variant="primary")
+        batch_result_output = gr.Textbox(
+            label="📊 Batch Test Results",
+            lines=20,
+            max_lines=50
+        )
+        batch_test_btn.click(
+            fn=test_recommended_models,
+            inputs=[],
+            outputs=[batch_result_output]
+        )
+    with gr.Tab("Usage Instructions"):
+        gr.Markdown("""
+        ## 📖 Usage Instructions
+        ### Scoring Criteria (Redesigned):
+        - **Depression-specific model** (+6 points): Specifically for depression/mental health detection
+        - **Emotion recognition model** (+3 points): For emotion recognition, potentially applicable
+        - **Audio support** (+2 points): Whether the model can process audio input
+        - **Multilingual support** (+2 points): Support for Chinese, English, German, Russian
+        - **Architecture suitability** (+2 points): Whether model architecture is suitable for speech classification
+        - **Classification configuration** (+1 point): Whether configured for classification tasks
+        - **Clinical datasets** (+2 points): Whether trained on clinical depression datasets
+        ### Score Interpretation:
+        - **12-15 points**: 🌟 Highly recommended, specialized depression detection model
+        - **8-11 points**: 👍 Recommended, may need fine-tuning
+        - **5-7 points**: ⚠️ Use with caution, needs modification
+        - **0-4 points**: ❌ Not recommended
+        ### Next Steps:
+        1. Select the top 2-3 models with highest scores
+        2. Conduct in-depth testing in Google Colab
+        3. Fine-tune using DAIC-WOZ dataset
+        4. Final evaluation with your multilingual data
+        """)
+# Launch application
+if __name__ == "__main__":
+    app.launch()