Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Athspi commited on Mar 8

Commit

073ce19

verified ·

1 Parent(s): ab0df5d

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -42

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import tempfile
 import base64
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
-from google.generativeai.types import Content, Part
 from gtts import gTTS, lang
 from kokoro import KPipeline
 from werkzeug.utils import secure_filename
@@ -17,6 +17,7 @@ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
     raise ValueError("GEMINI_API_KEY environment variable not set")
 genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
@@ -64,56 +65,55 @@ def translate_audio():
         with open(temp_input_path, "rb") as audio_file:
             audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
-        files = [client.files.upload(file=temp_input_path)]
-        contents = [
-            types.Content(
-                role="user",
-                parts=[
-                    types.Part.from_uri(
-                        file_uri=files[0].uri,
-                        mime_type=files[0].mime_type,
-                    ),
-                    types.Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
-                ],
-            ),
-        ]
-        generate_content_config = types.GenerateContentConfig(
-            temperature=1,
-            top_p=0.95,
-            top_k=40,
-            max_output_tokens=8192,
-            response_mime_type="text/plain",
-        )
         transcription = ""
-        for chunk in client.models.generate_content_stream(
             model="gemini-2.0-flash-lite",
-            contents=contents,
-            config=generate_content_config,
-        ):
-            transcription += chunk.text
         # Translate text using Gemini
         translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
-        translate_contents = [
-            types.Content(
-                role="user",
-                parts=[
-                    types.Part.from_text(text=translate_prompt),
-                ],
-            ),
-        ]
         translated_text = ""
-        for chunk in client.models.generate_content_stream(
             model="gemini-2.0-flash-lite",
-            contents=translate_contents,
-            config=generate_content_config,
-        ):
-            translated_text += chunk.text
         # Generate TTS
         if target_language in KOKORO_LANGUAGES:

 import base64
 from flask import Flask, request, jsonify, send_file, send_from_directory
 import google.generativeai as genai
+from google.generativeai.types import Content, Part, GenerateContentConfig
 from gtts import gTTS, lang
 from kokoro import KPipeline
 from werkzeug.utils import secure_filename
 if not GEMINI_API_KEY:
     raise ValueError("GEMINI_API_KEY environment variable not set")
+# Initialize Gemini client
 genai.configure(api_key=GEMINI_API_KEY)
 # Language configurations
         with open(temp_input_path, "rb") as audio_file:
             audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
+        # Upload file to Gemini
+        uploaded_file = genai.upload_file(path=temp_input_path)
+        # Generate transcription
         transcription = ""
+        response = genai.generate_content(
             model="gemini-2.0-flash-lite",
+            contents=[
+                Content(
+                    role="user",
+                    parts=[
+                        Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
+                        Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
+                    ],
+                ),
+            ],
+            config=GenerateContentConfig(
+                temperature=1,
+                top_p=0.95,
+                top_k=40,
+                max_output_tokens=8192,
+                response_mime_type="text/plain",
+            ),
+        )
+        transcription = response.text
         # Translate text using Gemini
         translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
         translated_text = ""
+        response = genai.generate_content(
             model="gemini-2.0-flash-lite",
+            contents=[
+                Content(
+                    role="user",
+                    parts=[
+                        Part.from_text(text=translate_prompt),
+                    ],
+                ),
+            ],
+            config=GenerateContentConfig(
+                temperature=1,
+                top_p=0.95,
+                top_k=40,
+                max_output_tokens=8192,
+                response_mime_type="text/plain",
+            ),
+        )
+        translated_text = response.text
         # Generate TTS
         if target_language in KOKORO_LANGUAGES: