youssefga28 commited on
Commit
603aa36
·
verified ·
1 Parent(s): dc66791

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -101
app.py CHANGED
@@ -1,101 +1,101 @@
1
- import os
2
- import requests
3
- import tempfile
4
- import gradio as gr
5
- from moviepy.editor import VideoFileClip
6
- from speechbrain.inference.interfaces import foreign_class
7
- import whisper
8
- from together import Together
9
-
10
- # Initialize Whisper once
11
- _whisper_model = whisper.load_model("base")
12
-
13
- # Initialize SpeechBrain classifier once
14
- _classifier = foreign_class(
15
- source="warisqr7/accent-id-commonaccent_xlsr-en-english",
16
- pymodule_file="custom_interface.py",
17
- classname="CustomEncoderWav2vec2Classifier"
18
- )
19
-
20
- # Helper to download direct‐mp4 URL to a temp file
21
- def download_video(url: str) -> str:
22
- resp = requests.get(url, stream=True)
23
- resp.raise_for_status()
24
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
25
- for chunk in resp.iter_content(8192):
26
- tmp.write(chunk)
27
- tmp.close()
28
- return tmp.name
29
-
30
- # Helper to extract audio to a temp file
31
- def extract_audio(video_path: str) -> str:
32
- tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
33
- clip = VideoFileClip(video_path)
34
- clip.audio.write_audiofile(tmp_audio, logger=None)
35
- clip.close()
36
- return tmp_audio
37
-
38
- # Main pipeline
39
- def analyze_url(video_url):
40
- try:
41
- # 1. Download & extract
42
- vid = download_video(video_url)
43
- aud = extract_audio(vid)
44
-
45
- # 2. Accent classification
46
- out_prob, score, idx, lab = _classifier.classify_file(aud)
47
- accent = lab[0]
48
- conf_pct = round(float(score) * 100, 2)
49
-
50
- # 3. Transcription
51
- result = _whisper_model.transcribe(aud)
52
- transcript = result["text"]
53
-
54
- # 4. LLM analysis
55
- api_key = "d2eac592fd335c7fd047814946f55e0c6fc26dbf75d88b0d9eb2be4a52108ea5"
56
- client = Together(api_key=api_key)
57
- prompt = f"""
58
- You are an English-speaking coach. Given this transcript of a spoken English audio with an {accent} accent and classification confidence {conf_pct}%:
59
- \"\"\"{transcript}\"\"\"
60
-
61
- Evaluate how confident the speaker sounds based on fluency, clarity, filler usage, professional English, and pacing.
62
- Provide:
63
- - A proficiency score between 0 and 100
64
- - A brief explanation
65
- """
66
- resp = client.chat.completions.create(
67
- model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
68
- messages=[{"role": "user", "content": prompt}]
69
- )
70
- analysis = resp.choices[0].message.content.strip()
71
-
72
- # Clean up temp files
73
- os.remove(vid)
74
- os.remove(aud)
75
-
76
- return accent, f"{conf_pct}%", transcript, analysis
77
-
78
- except Exception as e:
79
- return "Error", "", "", str(e)
80
-
81
- # Build Gradio interface
82
- with gr.Blocks(title="English Accent & Confidence Analyzer") as demo:
83
- gr.Markdown("## 🎙️ English Accent Detection & Confidence Analysis")
84
- with gr.Row():
85
- inp = gr.Textbox(label="Direct MP4 Video URL", placeholder="https://...")
86
- run = gr.Button("Analyze")
87
- with gr.Row():
88
- out1 = gr.Textbox(label="Detected Accent")
89
- out2 = gr.Textbox(label="Accent Confidence")
90
- out3 = gr.Textbox(label="Transcript", lines=5)
91
- out4 = gr.Textbox(label="LLM Confidence Analysis", lines=10)
92
-
93
- run.click(
94
- fn=analyze_url,
95
- inputs=inp,
96
- outputs=[out1, out2, out3, out4],
97
- api_name="analyze"
98
- )
99
-
100
- if __name__ == "__main__":
101
- demo.launch()
 
1
+ import os
2
+ import requests
3
+ import tempfile
4
+ import gradio as gr
5
+ from moviepy import VideoFileClip
6
+ from speechbrain.inference.interfaces import foreign_class
7
+ import whisper
8
+ from together import Together
9
+
10
+ # Initialize Whisper once
11
+ _whisper_model = whisper.load_model("base")
12
+
13
+ # Initialize SpeechBrain classifier once
14
+ _classifier = foreign_class(
15
+ source="warisqr7/accent-id-commonaccent_xlsr-en-english",
16
+ pymodule_file="custom_interface.py",
17
+ classname="CustomEncoderWav2vec2Classifier"
18
+ )
19
+
20
+ # Helper to download direct‐mp4 URL to a temp file
21
+ def download_video(url: str) -> str:
22
+ resp = requests.get(url, stream=True)
23
+ resp.raise_for_status()
24
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
25
+ for chunk in resp.iter_content(8192):
26
+ tmp.write(chunk)
27
+ tmp.close()
28
+ return tmp.name
29
+
30
+ # Helper to extract audio to a temp file
31
+ def extract_audio(video_path: str) -> str:
32
+ tmp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
33
+ clip = VideoFileClip(video_path)
34
+ clip.audio.write_audiofile(tmp_audio, logger=None)
35
+ clip.close()
36
+ return tmp_audio
37
+
38
+ # Main pipeline
39
+ def analyze_url(video_url):
40
+ try:
41
+ # 1. Download & extract
42
+ vid = download_video(video_url)
43
+ aud = extract_audio(vid)
44
+
45
+ # 2. Accent classification
46
+ out_prob, score, idx, lab = _classifier.classify_file(aud)
47
+ accent = lab[0]
48
+ conf_pct = round(float(score) * 100, 2)
49
+
50
+ # 3. Transcription
51
+ result = _whisper_model.transcribe(aud)
52
+ transcript = result["text"]
53
+
54
+ # 4. LLM analysis
55
+ api_key = "d2eac592fd335c7fd047814946f55e0c6fc26dbf75d88b0d9eb2be4a52108ea5"
56
+ client = Together(api_key=api_key)
57
+ prompt = f"""
58
+ You are an English-speaking coach. Given this transcript of a spoken English audio with an {accent} accent and classification confidence {conf_pct}%:
59
+ \"\"\"{transcript}\"\"\"
60
+
61
+ Evaluate how confident the speaker sounds based on fluency, clarity, filler usage, professional English, and pacing.
62
+ Provide:
63
+ - A proficiency score between 0 and 100
64
+ - A brief explanation
65
+ """
66
+ resp = client.chat.completions.create(
67
+ model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
68
+ messages=[{"role": "user", "content": prompt}]
69
+ )
70
+ analysis = resp.choices[0].message.content.strip()
71
+
72
+ # Clean up temp files
73
+ os.remove(vid)
74
+ os.remove(aud)
75
+
76
+ return accent, f"{conf_pct}%", transcript, analysis
77
+
78
+ except Exception as e:
79
+ return "Error", "", "", str(e)
80
+
81
+ # Build Gradio interface
82
+ with gr.Blocks(title="English Accent & Confidence Analyzer") as demo:
83
+ gr.Markdown("## 🎙️ English Accent Detection & Confidence Analysis")
84
+ with gr.Row():
85
+ inp = gr.Textbox(label="Direct MP4 Video URL", placeholder="https://...")
86
+ run = gr.Button("Analyze")
87
+ with gr.Row():
88
+ out1 = gr.Textbox(label="Detected Accent")
89
+ out2 = gr.Textbox(label="Accent Confidence")
90
+ out3 = gr.Textbox(label="Transcript", lines=5)
91
+ out4 = gr.Textbox(label="LLM Confidence Analysis", lines=10)
92
+
93
+ run.click(
94
+ fn=analyze_url,
95
+ inputs=inp,
96
+ outputs=[out1, out2, out3, out4],
97
+ api_name="analyze"
98
+ )
99
+
100
+ if __name__ == "__main__":
101
+ demo.launch()