Samimizhr commited on
Commit
e815c6b
Β·
verified Β·
1 Parent(s): 92beb66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +259 -270
app.py CHANGED
@@ -1,271 +1,260 @@
1
- # app.py - FIXED VERSION
2
- import streamlit as st
3
- import os
4
- import sys
5
-
6
- # MUST BE FIRST STREAMLIT COMMAND
7
- st.set_page_config(
8
- page_title="English Language & Accent Detection",
9
- page_icon="🌍",
10
- layout="centered"
11
- )
12
-
13
- # STREAMLIT CLOUD OPTIMIZATIONS
14
- import torch
15
- torch.set_num_threads(1) # Reduce CPU usage
16
- os.environ['TOKENIZERS_PARALLELISM'] = 'false' # Avoid threading issues
17
-
18
- # Add error handling for imports
19
- try:
20
- from utils import download_video, extract_audio, analyze_speech, cleanup_files
21
- except ImportError as e:
22
- st.error(f"❌ Import Error: {e}")
23
- st.info("This might be a deployment issue. Please check the logs.")
24
- st.stop()
25
-
26
- st.title("🌍 English Language & Accent Detection Tool")
27
- st.write("Upload a video to first detect if the speaker is speaking English, then analyze their English accent.")
28
-
29
- # Add a warning for Streamlit Cloud users
30
- st.info("⚠️ **Note**: First-time model loading may take 2-3 minutes. Please be patient!")
31
-
32
- # Information section
33
- with st.expander("ℹ️ How this tool works"):
34
- st.write("""
35
- ## Two-Step Analysis Process:
36
-
37
- ### Step 1: Language Detection 🌍
38
- - **Detects what language** the speaker is using
39
- - **Supports 107+ languages** using advanced AI models
40
- - **Only proceeds to accent analysis** if English is detected
41
-
42
- ### Step 2: English Accent Analysis 🎯 (Only if English detected)
43
- - **16 different English accents** can be identified:
44
- - American, British (England), Australian, Indian, Canadian
45
- - Scottish, Irish, Welsh, South African, New Zealand
46
- - Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic
47
-
48
- ## Perfect for:
49
- βœ… **Recruitment screening** - Verify English language candidates
50
- βœ… **Language assessment** - Determine if applicant speaks English
51
- βœ… **Accent identification** - Identify specific English accent varieties
52
- βœ… **Call center hiring** - Screen for English-speaking candidates
53
-
54
- ## Requirements:
55
- - Direct video file URL (MP4, AVI, MOV, etc.)
56
- - Clear audio with minimal background noise
57
- - At least 10-15 seconds of speech
58
- - Single speaker preferred
59
- """)
60
-
61
- # URL input
62
- video_url = st.text_input(
63
- "πŸ”— Video URL:",
64
- placeholder="https://example.com/video.mp4",
65
- help="Enter a direct link to a video file"
66
- )
67
-
68
- # Analysis button
69
- if st.button("πŸ” Analyze Language & Accent", type="primary"):
70
- if not video_url.strip():
71
- st.warning("⚠️ Please enter a video URL first.")
72
- else:
73
- video_path = None
74
- audio_path = None
75
-
76
- try:
77
- # Download video
78
- with st.spinner("πŸ“₯ Downloading video..."):
79
- video_path = download_video(video_url.strip())
80
-
81
- if not video_path or not os.path.exists(video_path):
82
- st.error("❌ **Video download failed!**")
83
- st.write("**Possible reasons:**")
84
- st.write("- URL is not a direct link to a video file")
85
- st.write("- Video is behind authentication/login")
86
- st.write("- Server is blocking requests")
87
- st.write("- URL is incorrect or video doesn't exist")
88
- st.stop()
89
-
90
- st.success(f"βœ… Video downloaded ({os.path.getsize(video_path):,} bytes)")
91
-
92
- # Extract audio
93
- with st.spinner("🎡 Extracting audio..."):
94
- audio_path = extract_audio(video_path)
95
-
96
- if not audio_path or not os.path.exists(audio_path):
97
- st.error("❌ **Audio extraction failed!**")
98
- st.write("**Possible reasons:**")
99
- st.write("- Video file is corrupted")
100
- st.write("- Video format not supported")
101
- st.write("- Video has no audio track")
102
- st.write("- FFmpeg is not properly installed")
103
- st.stop()
104
-
105
- st.success(f"βœ… Audio extracted ({os.path.getsize(audio_path):,} bytes)")
106
-
107
- # Analyze speech
108
- with st.spinner("🧠 Analyzing language and accent... This may take 2-3 minutes on first run..."):
109
- try:
110
- is_english, language, accent, lang_confidence, accent_confidence = analyze_speech(audio_path)
111
-
112
- # Display results
113
- st.markdown("---")
114
- st.markdown("### 🎯 Analysis Results")
115
-
116
- if not is_english:
117
- # NOT ENGLISH
118
- st.error("❌ **Speaker is NOT speaking English**")
119
-
120
- col1, col2 = st.columns(2)
121
- with col1:
122
- st.metric(
123
- label="Detected Language",
124
- value=language.title()
125
- )
126
- with col2:
127
- st.metric(
128
- label="Confidence",
129
- value=f"{lang_confidence:.1f}%"
130
- )
131
-
132
- st.info("πŸ’‘ **For English accent analysis, please provide a video where the speaker is speaking English.**")
133
-
134
- with st.expander("🌍 About Language Detection"):
135
- st.write(f"""
136
- **Detected Language:** {language.title()}
137
- **Detection Confidence:** {lang_confidence:.1f}%
138
-
139
- This tool first detects what language is being spoken before proceeding to accent analysis.
140
- Since the speaker appears to be speaking **{language.title()}** rather than English,
141
- we cannot proceed with English accent detection.
142
-
143
- **To get English accent analysis:**
144
- - Provide a video where the speaker is clearly speaking English
145
- - Ensure the audio quality is good
146
- - Make sure there's at least 10-15 seconds of speech
147
- """)
148
-
149
- else:
150
- # IS ENGLISH - Show accent results
151
- st.success("βœ… **Speaker IS speaking English!**")
152
-
153
- # Main metrics
154
- col1, col2, col3 = st.columns(3)
155
- with col1:
156
- st.metric(
157
- label="Language",
158
- value="English βœ…"
159
- )
160
- with col2:
161
- st.metric(
162
- label="Detected Accent",
163
- value=accent
164
- )
165
- with col3:
166
- st.metric(
167
- label="Accent Confidence",
168
- value=f"{accent_confidence:.1f}%"
169
- )
170
-
171
- # Confidence interpretation
172
- if accent_confidence >= 80:
173
- st.success("🎯 High confidence accent prediction")
174
- elif accent_confidence >= 60:
175
- st.info("πŸ€” Moderate confidence accent prediction")
176
- else:
177
- st.warning("⚠️ Low confidence accent prediction - results may be unreliable")
178
-
179
- # Detailed results
180
- with st.expander("πŸ“Š Detailed Analysis Results"):
181
- st.write(f"""
182
- ### Language Detection Results:
183
- **Language:** English
184
- **Language Confidence:** {lang_confidence:.1f}%
185
-
186
- ### English Accent Analysis:
187
- **Detected Accent:** {accent}
188
- **Accent Confidence:** {accent_confidence:.1f}%
189
-
190
- ### Interpretation:
191
- The AI first confirmed that the speaker is speaking English with {lang_confidence:.1f}% confidence.
192
- Then it analyzed the English accent and detected **{accent}** accent patterns
193
- with {accent_confidence:.1f}% confidence.
194
-
195
- ### Factors affecting accuracy:
196
- - Audio quality and clarity
197
- - Background noise levels
198
- - Speaker's accent strength
199
- - Length of speech sample
200
- - Speaking style and pace
201
-
202
- ### Supported English Accents:
203
- American, British (England), Australian, Indian, Canadian, Scottish, Irish, Welsh,
204
- South African, New Zealand, Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic
205
- """)
206
-
207
- # For recruiters
208
- st.markdown("### πŸ‘” For Recruiters & HR:")
209
- if lang_confidence >= 80:
210
- st.success("βœ… **CANDIDATE SPEAKS ENGLISH** - Suitable for English-speaking roles")
211
- elif lang_confidence >= 60:
212
- st.info("πŸ€” **LIKELY SPEAKS ENGLISH** - May need additional assessment")
213
- else:
214
- st.warning("⚠️ **UNCERTAIN** - Recommend manual review or additional testing")
215
-
216
- except Exception as e:
217
- st.error("❌ **Analysis failed!**")
218
- st.write("**Error details:**")
219
- st.code(str(e))
220
- st.write("**Possible solutions:**")
221
- st.write("- Try a different video with clearer audio")
222
- st.write("- Ensure the video contains clear speech (any language)")
223
- st.write("- Check that the audio is at least 10-15 seconds long")
224
- st.write("- Verify the video URL is accessible")
225
-
226
- except Exception as e:
227
- st.error(f"❌ **Unexpected error occurred:**")
228
- st.code(str(e))
229
- st.write("Please try again with a different video or contact support if the issue persists.")
230
-
231
- finally:
232
- # Clean up temporary files
233
- if video_path or audio_path:
234
- cleanup_files(video_path, audio_path)
235
-
236
- # Use cases section
237
- st.markdown("---")
238
- st.markdown("### 🎯 Use Cases")
239
-
240
- col1, col2 = st.columns(2)
241
-
242
- with col1:
243
- st.markdown("""
244
- **🏒 For Recruitment:**
245
- - Screen English-speaking candidates
246
- - Verify language requirements
247
- - Identify accent preferences
248
- - Filter initial applications
249
- """)
250
-
251
- with col2:
252
- st.markdown("""
253
- **πŸ“ž For Call Centers:**
254
- - Assess English fluency
255
- - Match accents to regions
256
- - Quality control checks
257
- - Training needs assessment
258
- """)
259
-
260
- # Footer
261
- st.markdown("---")
262
- st.markdown(
263
- """
264
- <div style='text-align: center; color: #666; font-size: 0.8em;'>
265
- 🌍 This tool first detects if the speaker is speaking English, then analyzes their English accent.<br>
266
- Perfect for recruitment screening and language assessment.<br>
267
- Results are AI-generated estimates and may not always be 100% accurate.
268
- </div>
269
- """,
270
- unsafe_allow_html=True
271
  )
 
1
+ # app.py - FIXED VERSION
2
+ import streamlit as st
3
+ import os
4
+ import sys
5
+
6
+ # MUST BE FIRST STREAMLIT COMMAND
7
+ st.set_page_config(
8
+ page_title="English Language & Accent Detection",
9
+ page_icon="🌍",
10
+ layout="centered"
11
+ )
12
+
13
+ # STREAMLIT CLOUD OPTIMIZATIONS
14
+ import torch
15
+ torch.set_num_threads(1) # Reduce CPU usage
16
+ os.environ['TOKENIZERS_PARALLELISM'] = 'false' # Avoid threading issues
17
+
18
+ # Add error handling for imports
19
+ try:
20
+ from utils import download_video, extract_audio, analyze_speech, cleanup_files
21
+ except ImportError as e:
22
+ st.error(f"❌ Import Error: {e}")
23
+ st.info("This might be a deployment issue. Please check the logs.")
24
+ st.stop()
25
+
26
+ st.title("🌍 English Language & Accent Detection Tool")
27
+ st.write("Upload a video to first detect if the speaker is speaking English, then analyze their English accent.")
28
+
29
+ # Add a warning for Streamlit Cloud users
30
+ st.info("⚠️ **Note**: First-time model loading may take 2-3 minutes. Please be patient!")
31
+
32
+ # Information section
33
+ with st.expander("ℹ️ How this tool works"):
34
+ st.write("""
35
+ ## Two-Step Analysis Process:
36
+
37
+ ### Step 1: Language Detection 🌍
38
+ - **Detects what language** the speaker is using
39
+ - **Supports 107+ languages** using advanced AI models
40
+ - **Only proceeds to accent analysis** if English is detected
41
+
42
+ ### Step 2: English Accent Analysis 🎯 (Only if English detected)
43
+ - **16 different English accents** can be identified:
44
+ - American, British (England), Australian, Indian, Canadian
45
+ - Scottish, Irish, Welsh, South African, New Zealand
46
+ - Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic
47
+
48
+ ## Perfect for:
49
+ βœ… **Recruitment screening** - Verify English language candidates
50
+ βœ… **Language assessment** - Determine if applicant speaks English
51
+ βœ… **Accent identification** - Identify specific English accent varieties
52
+ βœ… **Call center hiring** - Screen for English-speaking candidates
53
+
54
+ ## Requirements:
55
+ - Direct video file URL (MP4, AVI, MOV, etc.)
56
+ - Clear audio with minimal background noise
57
+ - At least 10-15 seconds of speech
58
+ - Single speaker preferred
59
+ """)
60
+
61
+ # URL input
62
+ video_url = st.text_input(
63
+ "πŸ”— Video URL:",
64
+ placeholder="https://example.com/video.mp4",
65
+ help="Enter a direct link to a video file"
66
+ )
67
+
68
+ # Analysis button
69
+ if st.button("πŸ” Analyze Language & Accent", type="primary"):
70
+ if not video_url.strip():
71
+ st.warning("⚠️ Please enter a video URL first.")
72
+ else:
73
+ video_path = None
74
+ audio_path = None
75
+
76
+ try:
77
+ # Download video
78
+ with st.spinner("πŸ“₯ Downloading video..."):
79
+ video_path = download_video(video_url.strip())
80
+
81
+ if not video_path or not os.path.exists(video_path):
82
+ st.error("❌ **Video download failed!**")
83
+ st.write("**Possible reasons:**")
84
+ st.write("- URL is not a direct link to a video file")
85
+ st.write("- Video is behind authentication/login")
86
+ st.write("- Server is blocking requests")
87
+ st.write("- URL is incorrect or video doesn't exist")
88
+ st.stop()
89
+
90
+ st.success(f"βœ… Video downloaded ({os.path.getsize(video_path):,} bytes)")
91
+
92
+ # Extract audio
93
+ with st.spinner("🎡 Extracting audio..."):
94
+ audio_path = extract_audio(video_path)
95
+
96
+ if not audio_path or not os.path.exists(audio_path):
97
+ st.error("❌ **Audio extraction failed!**")
98
+ st.write("**Possible reasons:**")
99
+ st.write("- Video file is corrupted")
100
+ st.write("- Video format not supported")
101
+ st.write("- Video has no audio track")
102
+ st.write("- FFmpeg is not properly installed")
103
+ st.stop()
104
+
105
+ st.success(f"βœ… Audio extracted ({os.path.getsize(audio_path):,} bytes)")
106
+
107
+ # Analyze speech
108
+ with st.spinner("🧠 Analyzing language and accent... This may take 2-3 minutes on first run..."):
109
+ try:
110
+ is_english, language, accent, lang_confidence, accent_confidence = analyze_speech(audio_path)
111
+
112
+ # Display results
113
+ st.markdown("---")
114
+ st.markdown("### 🎯 Analysis Results")
115
+
116
+ if not is_english:
117
+ # NOT ENGLISH
118
+ st.error("❌ **Speaker is NOT speaking English**")
119
+
120
+ col1, col2 = st.columns(2)
121
+ with col1:
122
+ st.metric(
123
+ label="Detected Language",
124
+ value=language.title()
125
+ )
126
+ with col2:
127
+ st.metric(
128
+ label="Confidence",
129
+ value=f"{lang_confidence:.1f}%"
130
+ )
131
+
132
+ st.info("πŸ’‘ **For English accent analysis, please provide a video where the speaker is speaking English.**")
133
+
134
+ with st.expander("🌍 About Language Detection"):
135
+ st.write(f"""
136
+ **Detected Language:** {language.title()}
137
+ **Detection Confidence:** {lang_confidence:.1f}%
138
+
139
+ This tool first detects what language is being spoken before proceeding to accent analysis.
140
+ Since the speaker appears to be speaking **{language.title()}** rather than English,
141
+ we cannot proceed with English accent detection.
142
+
143
+ **To get English accent analysis:**
144
+ - Provide a video where the speaker is clearly speaking English
145
+ - Ensure the audio quality is good
146
+ - Make sure there's at least 10-15 seconds of speech
147
+ """)
148
+
149
+ else:
150
+ # IS ENGLISH - Show accent results
151
+ st.success("βœ… **Speaker IS speaking English!**")
152
+
153
+ # Main metrics
154
+ col1, col2, col3 = st.columns(3)
155
+ with col1:
156
+ st.metric(
157
+ label="Language",
158
+ value="English βœ…"
159
+ )
160
+ with col2:
161
+ st.metric(
162
+ label="Detected Accent",
163
+ value=accent
164
+ )
165
+ with col3:
166
+ st.metric(
167
+ label="Accent Confidence",
168
+ value=f"{accent_confidence:.1f}%"
169
+ )
170
+
171
+ # Confidence interpretation
172
+ if accent_confidence >= 80:
173
+ st.success("🎯 High confidence accent prediction")
174
+ elif accent_confidence >= 60:
175
+ st.info("πŸ€” Moderate confidence accent prediction")
176
+ else:
177
+ st.warning("⚠️ Low confidence accent prediction - results may be unreliable")
178
+
179
+ # Detailed results
180
+ with st.expander("πŸ“Š Detailed Analysis Results"):
181
+ st.write(f"""
182
+
183
+
184
+ ### Factors affecting accuracy:
185
+ - Audio quality and clarity
186
+ - Background noise levels
187
+ - Speaker's accent strength
188
+ - Length of speech sample
189
+ - Speaking style and pace
190
+
191
+ ### Supported English Accents:
192
+ American, British (England), Australian, Indian, Canadian, Scottish, Irish, Welsh,
193
+ South African, New Zealand, Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic
194
+ """)
195
+
196
+ # For recruiters
197
+ st.markdown("### πŸ‘” For Recruiters & HR:")
198
+ if lang_confidence >= 80:
199
+ st.success("βœ… **CANDIDATE SPEAKS ENGLISH** - Suitable for English-speaking roles")
200
+ elif lang_confidence >= 60:
201
+ st.info("πŸ€” **LIKELY SPEAKS ENGLISH** - May need additional assessment")
202
+ else:
203
+ st.warning("⚠️ **UNCERTAIN** - Recommend manual review or additional testing")
204
+
205
+ except Exception as e:
206
+ st.error("❌ **Analysis failed!**")
207
+ st.write("**Error details:**")
208
+ st.code(str(e))
209
+ st.write("**Possible solutions:**")
210
+ st.write("- Try a different video with clearer audio")
211
+ st.write("- Ensure the video contains clear speech (any language)")
212
+ st.write("- Check that the audio is at least 10-15 seconds long")
213
+ st.write("- Verify the video URL is accessible")
214
+
215
+ except Exception as e:
216
+ st.error(f"❌ **Unexpected error occurred:**")
217
+ st.code(str(e))
218
+ st.write("Please try again with a different video or contact support if the issue persists.")
219
+
220
+ finally:
221
+ # Clean up temporary files
222
+ if video_path or audio_path:
223
+ cleanup_files(video_path, audio_path)
224
+
225
+ # Use cases section
226
+ st.markdown("---")
227
+ st.markdown("### 🎯 Use Cases")
228
+
229
+ col1, col2 = st.columns(2)
230
+
231
+ with col1:
232
+ st.markdown("""
233
+ **🏒 For Recruitment:**
234
+ - Screen English-speaking candidates
235
+ - Verify language requirements
236
+ - Identify accent preferences
237
+ - Filter initial applications
238
+ """)
239
+
240
+ with col2:
241
+ st.markdown("""
242
+ **πŸ“ž For Call Centers:**
243
+ - Assess English fluency
244
+ - Match accents to regions
245
+ - Quality control checks
246
+ - Training needs assessment
247
+ """)
248
+
249
+ # Footer
250
+ st.markdown("---")
251
+ st.markdown(
252
+ """
253
+ <div style='text-align: center; color: #666; font-size: 0.8em;'>
254
+ 🌍 This tool first detects if the speaker is speaking English, then analyzes their English accent.<br>
255
+ Perfect for recruitment screening and language assessment.<br>
256
+ Results are AI-generated estimates and may not always be 100% accurate.
257
+ </div>
258
+ """,
259
+ unsafe_allow_html=True
 
 
 
 
 
 
 
 
 
 
 
260
  )