Athspi commited on
Commit
073ce19
·
verified ·
1 Parent(s): ab0df5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -42
app.py CHANGED
@@ -3,7 +3,7 @@ import tempfile
3
  import base64
4
  from flask import Flask, request, jsonify, send_file, send_from_directory
5
  import google.generativeai as genai
6
- from google.generativeai.types import Content, Part
7
  from gtts import gTTS, lang
8
  from kokoro import KPipeline
9
  from werkzeug.utils import secure_filename
@@ -17,6 +17,7 @@ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
17
  if not GEMINI_API_KEY:
18
  raise ValueError("GEMINI_API_KEY environment variable not set")
19
 
 
20
  genai.configure(api_key=GEMINI_API_KEY)
21
 
22
  # Language configurations
@@ -64,56 +65,55 @@ def translate_audio():
64
  with open(temp_input_path, "rb") as audio_file:
65
  audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
66
 
67
- files = [client.files.upload(file=temp_input_path)]
68
-
69
- contents = [
70
- types.Content(
71
- role="user",
72
- parts=[
73
- types.Part.from_uri(
74
- file_uri=files[0].uri,
75
- mime_type=files[0].mime_type,
76
- ),
77
- types.Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
78
- ],
79
- ),
80
- ]
81
-
82
- generate_content_config = types.GenerateContentConfig(
83
- temperature=1,
84
- top_p=0.95,
85
- top_k=40,
86
- max_output_tokens=8192,
87
- response_mime_type="text/plain",
88
- )
89
 
 
90
  transcription = ""
91
- for chunk in client.models.generate_content_stream(
92
  model="gemini-2.0-flash-lite",
93
- contents=contents,
94
- config=generate_content_config,
95
- ):
96
- transcription += chunk.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  # Translate text using Gemini
99
  translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
100
 
101
- translate_contents = [
102
- types.Content(
103
- role="user",
104
- parts=[
105
- types.Part.from_text(text=translate_prompt),
106
- ],
107
- ),
108
- ]
109
-
110
  translated_text = ""
111
- for chunk in client.models.generate_content_stream(
112
  model="gemini-2.0-flash-lite",
113
- contents=translate_contents,
114
- config=generate_content_config,
115
- ):
116
- translated_text += chunk.text
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  # Generate TTS
119
  if target_language in KOKORO_LANGUAGES:
 
3
  import base64
4
  from flask import Flask, request, jsonify, send_file, send_from_directory
5
  import google.generativeai as genai
6
+ from google.generativeai.types import Content, Part, GenerateContentConfig
7
  from gtts import gTTS, lang
8
  from kokoro import KPipeline
9
  from werkzeug.utils import secure_filename
 
17
  if not GEMINI_API_KEY:
18
  raise ValueError("GEMINI_API_KEY environment variable not set")
19
 
20
+ # Initialize Gemini client
21
  genai.configure(api_key=GEMINI_API_KEY)
22
 
23
  # Language configurations
 
65
  with open(temp_input_path, "rb") as audio_file:
66
  audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
67
 
68
+ # Upload file to Gemini
69
+ uploaded_file = genai.upload_file(path=temp_input_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ # Generate transcription
72
  transcription = ""
73
+ response = genai.generate_content(
74
  model="gemini-2.0-flash-lite",
75
+ contents=[
76
+ Content(
77
+ role="user",
78
+ parts=[
79
+ Part.from_uri(file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type),
80
+ Part.from_text(text="Transcript the audio and provide only the text. Do not include any explanations or additional information."),
81
+ ],
82
+ ),
83
+ ],
84
+ config=GenerateContentConfig(
85
+ temperature=1,
86
+ top_p=0.95,
87
+ top_k=40,
88
+ max_output_tokens=8192,
89
+ response_mime_type="text/plain",
90
+ ),
91
+ )
92
+ transcription = response.text
93
 
94
  # Translate text using Gemini
95
  translate_prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{transcription}"
96
 
 
 
 
 
 
 
 
 
 
97
  translated_text = ""
98
+ response = genai.generate_content(
99
  model="gemini-2.0-flash-lite",
100
+ contents=[
101
+ Content(
102
+ role="user",
103
+ parts=[
104
+ Part.from_text(text=translate_prompt),
105
+ ],
106
+ ),
107
+ ],
108
+ config=GenerateContentConfig(
109
+ temperature=1,
110
+ top_p=0.95,
111
+ top_k=40,
112
+ max_output_tokens=8192,
113
+ response_mime_type="text/plain",
114
+ ),
115
+ )
116
+ translated_text = response.text
117
 
118
  # Generate TTS
119
  if target_language in KOKORO_LANGUAGES: