Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 26

Commit

7a3a01f

verified ·

1 Parent(s): 6d3eadd

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -20

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from dash.exceptions import PreventUpdate
 import requests
 from pytube import YouTube
 from pydub import AudioSegment
-import google.generativeai as genai
 # Try different import statements for moviepy
 try:
@@ -38,16 +38,13 @@ logger = logging.getLogger(__name__)
 # Initialize the Dash app
 app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
-# Retrieve the Google API key from Hugging Face Spaces
-GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-if not GOOGLE_API_KEY:
-    logger.error("GOOGLE_API_KEY not found in environment variables")
-    raise ValueError("GOOGLE_API_KEY not set")
-genai.configure(api_key=GOOGLE_API_KEY)
-# Initialize Gemini model
-model = genai.GenerativeModel('gemini-2.0-flash-lite')
 def is_valid_url(url):
     try:
@@ -101,6 +98,17 @@ def extract_audio(file_path):
         logger.error(f"Error extracting audio: {str(e)}")
         raise
 def process_media(contents, filename, url):
     logger.info("Starting media processing")
     try:
@@ -121,20 +129,14 @@ def process_media(contents, filename, url):
         if temp_file_path.lower().endswith(('.mp4', '.avi', '.mov', '.flv', '.wmv')):
             logger.info("Video file detected, extracting audio")
             audio_file_path = extract_audio(temp_file_path)
-            with open(audio_file_path, "rb") as audio_file:
-                audio_data = audio_file.read()
             os.unlink(audio_file_path)
         else:
-            logger.info("Audio file detected, reading directly")
-            with open(temp_file_path, "rb") as audio_file:
-                audio_data = audio_file.read()
         os.unlink(temp_file_path)
-        # Use the audio data directly with the Gemini model
-        response = model.generate_content(audio_data)
-        logger.info("Transcription completed successfully")
-        return response.text
     except Exception as e:
         logger.error(f"Error in process_media: {str(e)}")
         raise

 import requests
 from pytube import YouTube
 from pydub import AudioSegment
+import openai
 # Try different import statements for moviepy
 try:
 # Initialize the Dash app
 app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
+# Retrieve the OpenAI API key from Hugging Face Spaces
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+if not OPENAI_API_KEY:
+    logger.error("OPENAI_API_KEY not found in environment variables")
+    raise ValueError("OPENAI_API_KEY not set")
+openai.api_key = OPENAI_API_KEY
 def is_valid_url(url):
     try:
         logger.error(f"Error extracting audio: {str(e)}")
         raise
+def transcribe_audio(file_path):
+    logger.info(f"Transcribing audio: {file_path}")
+    try:
+        with open(file_path, "rb") as audio_file:
+            transcript = openai.Audio.transcribe("whisper-1", audio_file)
+        logger.info("Transcription completed successfully")
+        return transcript["text"]
+    except Exception as e:
+        logger.error(f"Error during transcription: {str(e)}")
+        raise
 def process_media(contents, filename, url):
     logger.info("Starting media processing")
     try:
         if temp_file_path.lower().endswith(('.mp4', '.avi', '.mov', '.flv', '.wmv')):
             logger.info("Video file detected, extracting audio")
             audio_file_path = extract_audio(temp_file_path)
+            transcript = transcribe_audio(audio_file_path)
             os.unlink(audio_file_path)
         else:
+            logger.info("Audio file detected, transcribing directly")
+            transcript = transcribe_audio(temp_file_path)
         os.unlink(temp_file_path)
+        return transcript
     except Exception as e:
         logger.error(f"Error in process_media: {str(e)}")
         raise