Spaces:

ash-171
/

accent-detection

Sleeping

App Files Files Community

ash-171 commited on 9 days ago

Commit

ffd2caa

verified ·

1 Parent(s): b157c0a

Update src/tools/accent_tool.py

Browse files

Files changed (1) hide show

src/tools/accent_tool.py +30 -5

src/tools/accent_tool.py CHANGED Viewed

@@ -1,4 +1,7 @@
-import os, requests, shutil
 from pydub import AudioSegment
 import whisper
 from speechbrain.pretrained.interfaces import foreign_class
@@ -11,7 +14,7 @@ class AccentAnalyzerTool:
             pymodule_file="custom_interface.py",
             classname="CustomEncoderWav2vec2Classifier"
         )
-        self.last_transcript = None
     def log(self, msg):
         print(f"[AccentAnalyzerTool] {msg}")
@@ -20,11 +23,33 @@ class AccentAnalyzerTool:
         try:
             self.log("Downloading video...")
             tmp_dir = "tmp"
             os.makedirs(tmp_dir, exist_ok=True)
             video_path = os.path.join(tmp_dir, "video.mp4")
-            r = requests.get(url)
             with open(video_path, "wb") as f:
-                f.write(r.content)
             self.log("Extracting audio...")
             audio_path = os.path.join(tmp_dir, "audio.wav")
@@ -44,7 +69,7 @@ class AccentAnalyzerTool:
                 f"with **{confidence}% confidence**.\n\n"
                 f"**Transcript of the audio:**\n\n *{transcript.strip(' ')}*"
             )
             shutil.rmtree(tmp_dir, ignore_errors=True)
             return summary

+import os
+import requests
+import shutil
+import subprocess
 from pydub import AudioSegment
 import whisper
 from speechbrain.pretrained.interfaces import foreign_class
             pymodule_file="custom_interface.py",
             classname="CustomEncoderWav2vec2Classifier"
         )
+        self.last_transcript = None
     def log(self, msg):
         print(f"[AccentAnalyzerTool] {msg}")
         try:
             self.log("Downloading video...")
             tmp_dir = "tmp"
+            # Clean up tmp folder if exists
+            if os.path.exists(tmp_dir):
+                shutil.rmtree(tmp_dir)
             os.makedirs(tmp_dir, exist_ok=True)
             video_path = os.path.join(tmp_dir, "video.mp4")
+            headers = {"User-Agent": "Mozilla/5.0"}
+            r = requests.get(url, headers=headers, stream=True)
+            r.raise_for_status()
             with open(video_path, "wb") as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+            file_size = os.path.getsize(video_path)
+            self.log(f"Downloaded video size: {file_size} bytes")
+            if file_size < 1000:
+                raise ValueError("Downloaded video file is too small or invalid")
+            # Debug with ffprobe to check video validity
+            ffprobe_cmd = ["ffprobe", "-v", "error", "-show_format", "-show_streams", video_path]
+            try:
+                output = subprocess.check_output(ffprobe_cmd, stderr=subprocess.STDOUT).decode()
+                self.log(f"ffprobe output:\n{output}")
+            except subprocess.CalledProcessError as e:
+                self.log(f"ffprobe error:\n{e.output.decode()}")
             self.log("Extracting audio...")
             audio_path = os.path.join(tmp_dir, "audio.wav")
                 f"with **{confidence}% confidence**.\n\n"
                 f"**Transcript of the audio:**\n\n *{transcript.strip(' ')}*"
             )
             shutil.rmtree(tmp_dir, ignore_errors=True)
             return summary