Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import tempfile
|
|
3 |
import base64
|
4 |
from flask import Flask, request, jsonify, send_file, send_from_directory
|
5 |
import google.generativeai as genai
|
6 |
-
from google.generativeai.types import Content, Part
|
7 |
from gtts import gTTS, lang
|
8 |
from kokoro import KPipeline
|
9 |
from werkzeug.utils import secure_filename
|
@@ -17,6 +17,7 @@ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
|
17 |
if not GEMINI_API_KEY:
|
18 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
19 |
|
|
|
20 |
genai.configure(api_key=GEMINI_API_KEY)
|
21 |
|
22 |
# Language configurations
|
@@ -64,56 +65,55 @@ def translate_audio():
|
|
64 |
with open(temp_input_path, "rb") as audio_file:
|
65 |
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
contents = [
|
70 |
-
types.Content(
|
71 |
-
role="user",
|
72 |
-
parts=[
|
73 |
-
types.Part.from_uri(
|
74 |
-
file_uri=files[0].uri,
|
75 |
-
mime_type=files[0].mime_type,
|
76 |
-
),
|
77 |
-
types.Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
|
78 |
-
],
|
79 |
-
),
|
80 |
-
]
|
81 |
-
|
82 |
-
generate_content_config = types.GenerateContentConfig(
|
83 |
-
temperature=1,
|
84 |
-
top_p=0.95,
|
85 |
-
top_k=40,
|
86 |
-
max_output_tokens=8192,
|
87 |
-
response_mime_type="text/plain",
|
88 |
-
)
|
89 |
|
|
|
90 |
transcription = ""
|
91 |
-
|
92 |
model="gemini-2.0-flash-lite",
|
93 |
-
contents=
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
# Translate text using Gemini
|
99 |
translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
|
100 |
|
101 |
-
translate_contents = [
|
102 |
-
types.Content(
|
103 |
-
role="user",
|
104 |
-
parts=[
|
105 |
-
types.Part.from_text(text=translate_prompt),
|
106 |
-
],
|
107 |
-
),
|
108 |
-
]
|
109 |
-
|
110 |
translated_text = ""
|
111 |
-
|
112 |
model="gemini-2.0-flash-lite",
|
113 |
-
contents=
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
# Generate TTS
|
119 |
if target_language in KOKORO_LANGUAGES:
|
|
|
3 |
import base64
|
4 |
from flask import Flask, request, jsonify, send_file, send_from_directory
|
5 |
import google.generativeai as genai
|
6 |
+
from google.generativeai.types import Content, Part, GenerateContentConfig
|
7 |
from gtts import gTTS, lang
|
8 |
from kokoro import KPipeline
|
9 |
from werkzeug.utils import secure_filename
|
|
|
17 |
if not GEMINI_API_KEY:
|
18 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
19 |
|
20 |
+
# Initialize Gemini client
|
21 |
genai.configure(api_key=GEMINI_API_KEY)
|
22 |
|
23 |
# Language configurations
|
|
|
65 |
with open(temp_input_path, "rb") as audio_file:
|
66 |
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
|
67 |
|
68 |
+
# Upload file to Gemini
|
69 |
+
uploaded_file = genai.upload_file(path=temp_input_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
# Generate transcription
|
72 |
transcription = ""
|
73 |
+
response = genai.generate_content(
|
74 |
model="gemini-2.0-flash-lite",
|
75 |
+
contents=[
|
76 |
+
Content(
|
77 |
+
role="user",
|
78 |
+
parts=[
|
79 |
+
Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
|
80 |
+
Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
|
81 |
+
],
|
82 |
+
),
|
83 |
+
],
|
84 |
+
config=GenerateContentConfig(
|
85 |
+
temperature=1,
|
86 |
+
top_p=0.95,
|
87 |
+
top_k=40,
|
88 |
+
max_output_tokens=8192,
|
89 |
+
response_mime_type="text/plain",
|
90 |
+
),
|
91 |
+
)
|
92 |
+
transcription = response.text
|
93 |
|
94 |
# Translate text using Gemini
|
95 |
translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
translated_text = ""
|
98 |
+
response = genai.generate_content(
|
99 |
model="gemini-2.0-flash-lite",
|
100 |
+
contents=[
|
101 |
+
Content(
|
102 |
+
role="user",
|
103 |
+
parts=[
|
104 |
+
Part.from_text(text=translate_prompt),
|
105 |
+
],
|
106 |
+
),
|
107 |
+
],
|
108 |
+
config=GenerateContentConfig(
|
109 |
+
temperature=1,
|
110 |
+
top_p=0.95,
|
111 |
+
top_k=40,
|
112 |
+
max_output_tokens=8192,
|
113 |
+
response_mime_type="text/plain",
|
114 |
+
),
|
115 |
+
)
|
116 |
+
translated_text = response.text
|
117 |
|
118 |
# Generate TTS
|
119 |
if target_language in KOKORO_LANGUAGES:
|