Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 26

Commit

c61e81a

verified ·

1 Parent(s): 1e94ca7

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -31

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from pydub import AudioSegment
 import requests
 import mimetypes
 import urllib.parse
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
 # Try to import moviepy with the simpler import statement
 try:
-    from moviepy import VideoFileClip
     logger.info("MoviePy (VideoFileClip) successfully imported")
 except ImportError as e:
     logger.error(f"Error importing MoviePy (VideoFileClip): {str(e)}")
@@ -92,6 +93,15 @@ def transcribe_audio_chunks(chunks):
             transcriptions.append(transcript.get('text', ''))
     return ' '.join(transcriptions)
 def process_media(file_path, is_url=False):
     global generated_file, transcription_text
     temp_file = None
@@ -100,43 +110,27 @@ def process_media(file_path, is_url=False):
         if is_url:
             logger.info(f"Processing URL: {file_path}")
             try:
-                response = requests.get(file_path, stream=True)
-                response.raise_for_status()
-                content_type = response.headers.get('content-type', '')
-                extension = mimetypes.guess_extension(content_type) or ''
-                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
-                for chunk in response.iter_content(chunk_size=8192):
-                    temp_file.write(chunk)
-                temp_file.close()
-                logger.info(f"URL content downloaded: {temp_file.name}")
             except Exception as e:
                 logger.error(f"Error downloading URL content: {str(e)}")
                 return f"Error downloading URL content: {str(e)}", False
         else:
             logger.info("Processing uploaded file")
-            temp_file = tempfile.NamedTemporaryFile(delete=False)
             temp_file.write(file_path)
             temp_file.close()
-            logger.info(f"Uploaded file saved: {temp_file.name}")
-        # Try to extract audio using moviepy
         try:
-            audio = AudioFileClip(temp_file.name)
-            wav_path = temp_file.name + ".wav"
-            audio.write_audiofile(wav_path)
-            audio.close()
-        except Exception as e:
-            logger.warning(f"Could not process with moviepy: {str(e)}")
-            # If moviepy fails, try with pydub
-            try:
-                audio = AudioSegment.from_file(temp_file.name)
-                wav_path = temp_file.name + ".wav"
-                audio.export(wav_path, format="wav")
-            except Exception as e:
-                logger.error(f"Could not process audio: {str(e)}")
-                return f"Could not process audio: {str(e)}", False
-        logger.info(f"Audio extracted to WAV: {wav_path}")
         # Chunk the audio file
         audio = AudioSegment.from_wav(wav_path)
@@ -156,8 +150,8 @@ def process_media(file_path, is_url=False):
         logger.error(f"Error during processing: {str(e)}")
         return f"An error occurred: {str(e)}", False
     finally:
-        if temp_file and os.path.exists(temp_file.name):
-            os.unlink(temp_file.name)
         if wav_path and os.path.exists(wav_path):
             os.unlink(wav_path)

 import requests
 import mimetypes
 import urllib.parse
+import subprocess
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Try to import moviepy with the simpler import statement
 try:
+    from moviepy import VideoFileClip, AudioFileClip
     logger.info("MoviePy (VideoFileClip) successfully imported")
 except ImportError as e:
     logger.error(f"Error importing MoviePy (VideoFileClip): {str(e)}")
             transcriptions.append(transcript.get('text', ''))
     return ' '.join(transcriptions)
+def download_file(url):
+    local_filename = url.split('/')[-1]
+    with requests.get(url, stream=True) as r:
+        r.raise_for_status()
+        with open(local_filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+    return local_filename
 def process_media(file_path, is_url=False):
     global generated_file, transcription_text
     temp_file = None
         if is_url:
             logger.info(f"Processing URL: {file_path}")
             try:
+                temp_file = download_file(file_path)
+                logger.info(f"URL content downloaded: {temp_file}")
             except Exception as e:
                 logger.error(f"Error downloading URL content: {str(e)}")
                 return f"Error downloading URL content: {str(e)}", False
         else:
             logger.info("Processing uploaded file")
+            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
             temp_file.write(file_path)
             temp_file.close()
+            temp_file = temp_file.name
+            logger.info(f"Uploaded file saved: {temp_file}")
+        # Convert to WAV using ffmpeg
+        wav_path = temp_file + ".wav"
         try:
+            subprocess.run(['ffmpeg', '-i', temp_file, '-acodec', 'pcm_s16le', '-ar', '44100', wav_path], check=True)
+            logger.info(f"Audio extracted to WAV: {wav_path}")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"FFmpeg conversion failed: {str(e)}")
+            return f"FFmpeg conversion failed: {str(e)}", False
         # Chunk the audio file
         audio = AudioSegment.from_wav(wav_path)
         logger.error(f"Error during processing: {str(e)}")
         return f"An error occurred: {str(e)}", False
     finally:
+        if temp_file and os.path.exists(temp_file):
+            os.unlink(temp_file)
         if wav_path and os.path.exists(wav_path):
             os.unlink(wav_path)