Tantan18 commited on
Commit
3df16ad
Β·
verified Β·
1 Parent(s): bd09630

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +363 -0
app.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ from transformers import (
5
+ AutoModel, AutoProcessor, AutoFeatureExtractor,
6
+ AutoTokenizer, pipeline
7
+ )
8
+ import warnings
9
+ warnings.filterwarnings("ignore")
10
+
11
+ def test_single_model(model_name):
12
+ """Test compatibility of a single model"""
13
+
14
+ if not model_name.strip():
15
+ return "Please enter a model name"
16
+
17
+ result_text = f"πŸ” Testing Model: {model_name}\n"
18
+ result_text += "=" * 60 + "\n\n"
19
+
20
+ try:
21
+ # 1. Load model
22
+ result_text += "1️⃣ Loading Model...\n"
23
+ model = AutoModel.from_pretrained(model_name)
24
+ result_text += " βœ… Model loaded successfully\n"
25
+ result_text += f" πŸ“Š Model type: {model.config.model_type}\n"
26
+ result_text += f" πŸ—οΈ Model class: {model.__class__.__name__}\n\n"
27
+
28
+ # 2. Check model architecture
29
+ result_text += "2️⃣ Checking Model Architecture...\n"
30
+ if hasattr(model.config, 'hidden_size'):
31
+ result_text += f" πŸ”’ Hidden size: {model.config.hidden_size}\n"
32
+ if hasattr(model.config, 'num_hidden_layers'):
33
+ result_text += f" πŸ“š Number of layers: {model.config.num_hidden_layers}\n"
34
+ if hasattr(model.config, 'vocab_size'):
35
+ result_text += f" πŸ“– Vocabulary size: {model.config.vocab_size}\n"
36
+ result_text += "\n"
37
+
38
+ # 3. Try to load processor
39
+ result_text += "3️⃣ Loading Processor...\n"
40
+ processor = None
41
+ supports_audio = False
42
+
43
+ try:
44
+ processor = AutoProcessor.from_pretrained(model_name)
45
+ result_text += f" βœ… Processor loaded successfully: {processor.__class__.__name__}\n"
46
+ supports_audio = True
47
+ except:
48
+ try:
49
+ processor = AutoFeatureExtractor.from_pretrained(model_name)
50
+ result_text += f" βœ… Feature extractor loaded successfully: {processor.__class__.__name__}\n"
51
+ supports_audio = True
52
+ except:
53
+ result_text += " ❌ Cannot load audio processor\n"
54
+ supports_audio = False
55
+ result_text += "\n"
56
+
57
+ # 4. Check input requirements
58
+ result_text += "4️⃣ Checking Input Requirements...\n"
59
+ sampling_rate = 16000 # Default value
60
+ if processor and supports_audio:
61
+ if hasattr(processor, 'sampling_rate'):
62
+ sampling_rate = processor.sampling_rate
63
+ result_text += f" 🎡 Sampling rate: {sampling_rate} Hz\n"
64
+ if hasattr(processor, 'feature_size'):
65
+ result_text += f" πŸ“ Feature dimension: {processor.feature_size}\n"
66
+ if hasattr(processor, 'return_attention_mask'):
67
+ result_text += f" 🎭 Supports attention mask: {processor.return_attention_mask}\n"
68
+ result_text += "\n"
69
+
70
+ # 5. Test inference
71
+ result_text += "5️⃣ Testing Inference...\n"
72
+ if supports_audio:
73
+ try:
74
+ # Create dummy audio data (2 seconds)
75
+ dummy_audio = np.random.randn(sampling_rate * 2).astype(np.float32)
76
+
77
+ # Process audio
78
+ inputs = processor(dummy_audio, sampling_rate=sampling_rate, return_tensors="pt")
79
+
80
+ # Model inference
81
+ with torch.no_grad():
82
+ outputs = model(**inputs)
83
+
84
+ # Check output
85
+ if hasattr(outputs, 'last_hidden_state'):
86
+ shape = outputs.last_hidden_state.shape
87
+ result_text += f" βœ… Inference successful! Hidden state shape: {shape}\n"
88
+ elif hasattr(outputs, 'logits'):
89
+ shape = outputs.logits.shape
90
+ result_text += f" βœ… Inference successful! Logits shape: {shape}\n"
91
+ else:
92
+ result_text += f" βœ… Inference successful! Output type: {type(outputs)}\n"
93
+
94
+ except Exception as e:
95
+ result_text += f" ❌ Inference failed: {str(e)}\n"
96
+ else:
97
+ result_text += " ⚠️ Audio input not supported, skipping inference test\n"
98
+ result_text += "\n"
99
+
100
+ # 6. Multilingual support check
101
+ result_text += "6️⃣ Multilingual Support Check...\n"
102
+ multilingual = False
103
+
104
+ if hasattr(model.config, 'vocab_size') and model.config.vocab_size > 50000:
105
+ result_text += f" βœ… Likely supports multiple languages (large vocabulary: {model.config.vocab_size})\n"
106
+ multilingual = True
107
+ elif any(keyword in model_name.lower() for keyword in ['xlsr', 'multilingual', 'cross-lingual']):
108
+ result_text += " βœ… Supports multiple languages based on model name\n"
109
+ multilingual = True
110
+ else:
111
+ result_text += " ❓ Multilingual support unclear\n"
112
+ result_text += "\n"
113
+
114
+ # 7. Depression detection suitability scoring
115
+ result_text += "7️⃣ Depression Detection Suitability Assessment...\n"
116
+ score = 0
117
+ max_score = 15
118
+
119
+ # Most important: Specifically for depression/mental health detection (6 points)
120
+ depression_keywords = ['depression', 'mental-health', 'psychological', 'mood', 'phq']
121
+ if any(keyword in model_name.lower() for keyword in depression_keywords):
122
+ score += 6
123
+ result_text += " 🎯 Specifically for depression/mental health detection (+6 points)\n"
124
+
125
+ # Secondary: For emotion recognition (3 points)
126
+ emotion_keywords = ['emotion', 'sentiment', 'affective', 'feeling']
127
+ elif any(keyword in model_name.lower() for keyword in emotion_keywords):
128
+ score += 3
129
+ result_text += " 😊 For emotion recognition, potentially applicable (+3 points)\n"
130
+
131
+ # Basic requirement: Audio input support (2 points)
132
+ if supports_audio:
133
+ score += 2
134
+ result_text += " 🎡 Supports audio input (+2 points)\n"
135
+ else:
136
+ result_text += " ❌ Does not support audio input (0 points)\n"
137
+
138
+ # Multilingual support (2 points)
139
+ if multilingual:
140
+ score += 2
141
+ result_text += " 🌍 Supports multiple languages (+2 points)\n"
142
+
143
+ # Architecture suitability (2 points)
144
+ if model.config.model_type in ['wav2vec2', 'hubert', 'wavlm']:
145
+ score += 2
146
+ result_text += " πŸ—οΈ Excellent speech representation learning architecture (+2 points)\n"
147
+ elif model.config.model_type == 'whisper':
148
+ score += 1
149
+ result_text += " ⚠️ Whisper architecture needs modification for classification (+1 point)\n"
150
+
151
+ # Check if configured for classification
152
+ if hasattr(model.config, 'num_labels'):
153
+ if model.config.num_labels == 2:
154
+ score += 1
155
+ result_text += f" βœ… Binary classification task configuration (likely depression detection) (+1 point)\n"
156
+ else:
157
+ score += 0.5
158
+ result_text += f" ⚠️ Multi-class task ({model.config.num_labels} classes) (+0.5 points)\n"
159
+
160
+ # Check for training dataset clues
161
+ daic_keywords = ['daic', 'wizard-of-oz', 'depression-detection', 'clinical']
162
+ if any(keyword in model_name.lower() for keyword in daic_keywords):
163
+ score += 2
164
+ result_text += " πŸ“Š Possibly trained on clinical depression datasets (+2 points)\n"
165
+
166
+ result_text += f"\n🎯 Depression Detection Suitability Score: {score}/{max_score}\n"
167
+
168
+ # 8. Recommendations
169
+ result_text += "\n8️⃣ Usage Recommendations...\n"
170
+ if score >= 12:
171
+ result_text += " 🌟 Highly recommended! Specifically for depression detection, very suitable\n"
172
+ elif score >= 8:
173
+ result_text += " πŸ‘ Recommended, may need some fine-tuning\n"
174
+ elif score >= 5:
175
+ result_text += " ⚠️ Use with caution, may need significant modification\n"
176
+ else:
177
+ result_text += " ❌ Not recommended, suggest finding specialized depression detection models\n"
178
+
179
+ # 9. Further inspection suggestions
180
+ result_text += "\n9️⃣ Further Inspection Suggestions...\n"
181
+ result_text += " πŸ” Check model card for training data description\n"
182
+ result_text += " πŸ“Š Check if DAIC-WOZ or other depression datasets are mentioned\n"
183
+ result_text += " πŸ“ Check papers or documentation for task description\n"
184
+ result_text += " πŸ§ͺ Test with small samples to see if model output matches depression detection expectations\n"
185
+
186
+ return result_text
187
+
188
+ except Exception as e:
189
+ error_msg = f"❌ Model test failed: {str(e)}\n"
190
+ error_msg += "\nPossible causes:\n"
191
+ error_msg += "β€’ Incorrect model name\n"
192
+ error_msg += "β€’ Model requires special permissions\n"
193
+ error_msg += "β€’ Network connection issues\n"
194
+ error_msg += "β€’ Model architecture incompatibility\n"
195
+ return error_msg
196
+
197
+ def test_recommended_models():
198
+ """Test recommended model list"""
199
+
200
+ recommended_models = [
201
+ "facebook/wav2vec2-large-xlsr-53",
202
+ "microsoft/wavlm-large",
203
+ "harshit345/xlsr-wav2vec-speech-emotion-recognition",
204
+ "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim",
205
+ "speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
206
+ ]
207
+
208
+ result_text = "πŸ” Batch Testing Recommended Models\n"
209
+ result_text += "=" * 60 + "\n\n"
210
+
211
+ results = []
212
+
213
+ for i, model_name in enumerate(recommended_models, 1):
214
+ result_text += f"πŸ“Š Testing {i}/{len(recommended_models)}: {model_name}\n"
215
+ result_text += "-" * 50 + "\n"
216
+
217
+ try:
218
+ # Simplified quick test
219
+ model = AutoModel.from_pretrained(model_name)
220
+
221
+ # Check audio support
222
+ supports_audio = False
223
+ try:
224
+ processor = AutoProcessor.from_pretrained(model_name)
225
+ supports_audio = True
226
+ except:
227
+ try:
228
+ processor = AutoFeatureExtractor.from_pretrained(model_name)
229
+ supports_audio = True
230
+ except:
231
+ pass
232
+
233
+ # Check multilingual
234
+ multilingual = False
235
+ if hasattr(model.config, 'vocab_size') and model.config.vocab_size > 50000:
236
+ multilingual = True
237
+ elif any(keyword in model_name.lower() for keyword in ['xlsr', 'multilingual']):
238
+ multilingual = True
239
+
240
+ # Calculate simplified score
241
+ score = 0
242
+ if supports_audio:
243
+ score += 3
244
+ if multilingual:
245
+ score += 2
246
+ if model.config.model_type in ['wav2vec2', 'hubert', 'wavlm']:
247
+ score += 3
248
+
249
+ results.append({
250
+ 'name': model_name,
251
+ 'score': score,
252
+ 'audio': supports_audio,
253
+ 'multilingual': multilingual,
254
+ 'type': model.config.model_type
255
+ })
256
+
257
+ result_text += f"βœ… Loaded successfully | Audio: {'βœ…' if supports_audio else '❌'} | Multilingual: {'βœ…' if multilingual else '❌'} | Score: {score}/8\n\n"
258
+
259
+ except Exception as e:
260
+ result_text += f"❌ Loading failed: {str(e)}\n\n"
261
+
262
+ # Sort and recommend
263
+ results.sort(key=lambda x: x['score'], reverse=True)
264
+
265
+ result_text += "πŸ† Recommendation Rankings:\n"
266
+ result_text += "=" * 40 + "\n"
267
+
268
+ for i, model in enumerate(results, 1):
269
+ result_text += f"{i}. {model['name']}\n"
270
+ result_text += f" Score: {model['score']}/8 | Type: {model['type']}\n\n"
271
+
272
+ return result_text
273
+
274
+ # Create Gradio interface
275
+ with gr.Blocks(title="πŸ€– Depression Detection Model Compatibility Test") as app:
276
+ gr.Markdown("""
277
+ # πŸ€– Depression Detection Model Compatibility Test Tool
278
+
279
+ This tool helps you quickly test whether Hugging Face models are suitable for depression detection tasks.
280
+
281
+ ## Features:
282
+ - βœ… Check model loading compatibility
283
+ - 🎡 Verify audio input support
284
+ - 🌍 Assess multilingual capabilities
285
+ - πŸ“Š Suitability scoring (0-15 points)
286
+ - πŸ’‘ Usage recommendations
287
+ """)
288
+
289
+ with gr.Tab("Single Model Test"):
290
+ with gr.Row():
291
+ model_input = gr.Textbox(
292
+ placeholder="Enter model name, e.g.: facebook/wav2vec2-large-xlsr-53",
293
+ label="πŸ” Model Name",
294
+ value="ireneminhee/speech-to-depression"
295
+ )
296
+ test_btn = gr.Button("πŸš€ Start Test", variant="primary")
297
+
298
+ result_output = gr.Textbox(
299
+ label="πŸ“‹ Test Results",
300
+ lines=25,
301
+ max_lines=50
302
+ )
303
+
304
+ test_btn.click(
305
+ fn=test_single_model,
306
+ inputs=[model_input],
307
+ outputs=[result_output]
308
+ )
309
+
310
+ with gr.Tab("Recommended Models Batch Test"):
311
+ gr.Markdown("""
312
+ ### 🌟 Recommended Depression Detection Candidate Models
313
+
314
+ These models perform well in speech emotion recognition and multilingual support:
315
+ - `facebook/wav2vec2-large-xlsr-53` - Multilingual speech representation learning
316
+ - `microsoft/wavlm-large` - Speech understanding specialized model
317
+ - `harshit345/xlsr-wav2vec-speech-emotion-recognition` - Emotion recognition
318
+ - `audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim` - Emotion dimension recognition
319
+ - `speechbrain/emotion-recognition-wav2vec2-IEMOCAP` - Emotion classification
320
+ """)
321
+
322
+ batch_test_btn = gr.Button("πŸ” Batch Test Recommended Models", variant="primary")
323
+ batch_result_output = gr.Textbox(
324
+ label="πŸ“Š Batch Test Results",
325
+ lines=20,
326
+ max_lines=50
327
+ )
328
+
329
+ batch_test_btn.click(
330
+ fn=test_recommended_models,
331
+ inputs=[],
332
+ outputs=[batch_result_output]
333
+ )
334
+
335
+ with gr.Tab("Usage Instructions"):
336
+ gr.Markdown("""
337
+ ## πŸ“– Usage Instructions
338
+
339
+ ### Scoring Criteria (Redesigned):
340
+ - **Depression-specific model** (+6 points): Specifically for depression/mental health detection
341
+ - **Emotion recognition model** (+3 points): For emotion recognition, potentially applicable
342
+ - **Audio support** (+2 points): Whether the model can process audio input
343
+ - **Multilingual support** (+2 points): Support for Chinese, English, German, Russian
344
+ - **Architecture suitability** (+2 points): Whether model architecture is suitable for speech classification
345
+ - **Classification configuration** (+1 point): Whether configured for classification tasks
346
+ - **Clinical datasets** (+2 points): Whether trained on clinical depression datasets
347
+
348
+ ### Score Interpretation:
349
+ - **12-15 points**: 🌟 Highly recommended, specialized depression detection model
350
+ - **8-11 points**: πŸ‘ Recommended, may need fine-tuning
351
+ - **5-7 points**: ⚠️ Use with caution, needs modification
352
+ - **0-4 points**: ❌ Not recommended
353
+
354
+ ### Next Steps:
355
+ 1. Select the top 2-3 models with highest scores
356
+ 2. Conduct in-depth testing in Google Colab
357
+ 3. Fine-tune using DAIC-WOZ dataset
358
+ 4. Final evaluation with your multilingual data
359
+ """)
360
+
361
+ # Launch application
362
+ if __name__ == "__main__":
363
+ app.launch()