Athspi commited on
Commit
8a409a5
·
verified ·
1 Parent(s): 9fd5ca4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -183
app.py CHANGED
@@ -1,221 +1,159 @@
1
- # Step 1: Install required libraries
2
- # Run this command in your terminal or notebook before running the script:
3
- # pip install gradio requests google-generativeai moviepy pydub
4
-
5
- # Step 2: Import libraries
6
  import os
7
- import time
8
- import re
9
  import google.generativeai as genai
10
  import requests
11
- import gradio as gr
12
- import tempfile
13
- from pydub import AudioSegment
14
  from moviepy.editor import VideoFileClip, AudioFileClip
 
 
15
  from google.generativeai.types import HarmCategory, HarmBlockThreshold
16
 
17
- # --- SECRET MANAGEMENT ---
18
- # Step 3: Load secrets from the environment
19
- # This securely loads the API key and URL you set in the Colab/Hugging Face secrets manager.
 
20
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
- TTS_API_URL = os.getenv("TTS_API_URL", "https://athspi-aitools-aittsg.hf.space/api/generate-tts/") # Default fallback
22
 
23
- # Check if the secrets were loaded correctly
24
  if not GEMINI_API_KEY:
25
- raise ValueError("GEMINI_API_KEY secret not found! Please set it in your environment or Colab/Hugging Face secrets.")
26
  if not TTS_API_URL:
27
- raise ValueError("TTS_API_URL secret not found or empty! Please set it.")
28
 
29
  # Configure the Gemini API with the loaded key
30
  genai.configure(api_key=GEMINI_API_KEY)
31
 
32
- # Step 4: Define Voice Choices
 
 
 
 
 
 
 
 
33
  VOICE_CHOICES = {
34
  "Male (Charon)": "Charon",
35
  "Female (Zephyr)": "Zephyr"
36
  }
37
 
38
- # Step 5: "Single Narrator" Magic Prompt for Gemini
39
  GEMINI_PROMPT = """
40
  You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
41
 
42
  **CRITICAL INSTRUCTIONS:**
43
-
44
- 1. **Single Script:** Combine all dialogue from all speakers into one continuous script. The final output should be a single paragraph of text.
45
- 2. **NO Timestamps:** Do NOT include any timestamps (e.g., [00:01 - 00:03]).
46
- 3. **NO Speaker Labels:** Do NOT include any speaker labels or identifiers (e.g., ஆண்_1, பெண்_2).
47
- 4. **Incorporate Performance:** To make the script expressive for a single narrator, add English style prompts and performance tags directly into the text.
48
- - Use style prompts like `Say happily:`, `Whisper mysteriously:`, `Shout angrily:`.
49
- - Use performance tags like `[laugh]`, `[singing]`, `[sigh]`.
50
 
51
  **EXAMPLE OUTPUT:**
52
- Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும். Shout angrily: உடனே இங்கிருந்து போ!
53
  """
54
 
55
- # Step 6: Function to upload files to Gemini
56
- def upload_to_gemini(path, mime_type=None):
57
- """Uploads the given file to Gemini."""
58
- print(f"Uploading file: {path}")
59
- file = genai.upload_file(path, mime_type=mime_type)
60
- print(f"Uploaded file '{file.display_name}' as: {file.uri}")
61
- return file
62
-
63
- # Step 7: Function to wait for files to be active
64
- def wait_for_files_active(files):
65
- """Waits for the given files to be active."""
66
- print("Waiting for file processing...")
67
- for name in (file.name for file in files):
68
- file = genai.get_file(name)
69
- while file.state.name == "PROCESSING":
70
- print(".", end="", flush=True)
71
- time.sleep(10)
72
- file = genai.get_file(name)
73
- if file.state.name != "ACTIVE":
74
- raise Exception(f"File {file.name} failed to process")
75
- print("...all files ready")
76
- print()
77
-
78
- # Step 8: Function to generate the single Tamil script
79
  def generate_tamil_script(video_file_path):
80
  """Generates a single, continuous Tamil script from the video."""
81
- try:
82
- video_file = upload_to_gemini(video_file_path, mime_type="video/mp4")
83
- wait_for_files_active([video_file])
84
-
85
- model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
86
- print("Generating single narrator script...")
87
- response = model.generate_content(
88
- [GEMINI_PROMPT, video_file],
89
- request_options={"timeout": 1000},
90
- safety_settings={
91
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
92
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
93
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
94
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
95
- }
96
- )
97
-
98
- print(f"Deleting uploaded file from Gemini: {video_file.name}")
99
- genai.delete_file(video_file.name)
100
-
101
- if response.text:
102
- return " ".join(response.text.strip().splitlines())
103
- else:
104
- return "Error: No valid script was generated by Gemini."
105
- except Exception as e:
106
- return f"Error in Gemini generation: {str(e)}"
107
 
108
- # Step 9: Simplified function to generate a single audio file
109
  def generate_single_audio_track(dialogue_text, voice_name, is_cheerful, output_path):
110
  """Generates one continuous audio track for the entire script."""
111
- try:
112
- print(f"Generating single audio track with voice '{voice_name}' | Cheerful: {is_cheerful}")
113
-
114
- payload = {
115
- "text": dialogue_text,
116
- "voice_name": voice_name,
117
- "cheerful": is_cheerful
118
- }
119
-
120
- response = requests.post(TTS_API_URL, json=payload)
121
-
122
- if response.status_code == 200:
123
- with open(output_path, "wb") as f:
124
- f.write(response.content)
125
- print(f"Audio track saved successfully to {output_path}")
126
- return True
127
- else:
128
- print(f"Error from TTS API: {response.status_code} - {response.text}")
129
- return False
130
-
131
- except Exception as e:
132
- print(f"An error occurred in generate_single_audio_track: {str(e)}")
133
- return False
134
 
135
- # Step 10: Function to replace video audio
136
  def replace_video_audio(video_path, new_audio_path, output_path):
137
  """Replaces the audio of a video with a new audio file."""
138
- try:
139
- video_clip = VideoFileClip(video_path)
140
- audio_clip = AudioFileClip(new_audio_path)
141
- final_clip = video_clip.set_audio(audio_clip)
142
- final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
143
- video_clip.close()
144
- audio_clip.close()
145
- final_clip.close()
146
- return output_path
147
- except Exception as e:
148
- return f"Error replacing video audio: {str(e)}"
149
-
150
- # Step 11: Main processing function
151
- def process_video_single_speaker(video_path, voice_choice, is_cheerful):
152
- """Processes the video for single-speaker dubbing."""
153
- if not video_path:
154
- return None, "Please upload a video file first."
155
-
156
- try:
157
- print("-" * 50)
158
- print(f"Starting single-speaker processing for: {video_path}")
159
-
160
- script = generate_tamil_script(video_path)
161
- if script.startswith("Error:"):
162
- return None, script
163
- print("\n--- Generated Script ---\n", script, "\n------------------------\n")
164
-
 
 
 
 
 
 
165
  voice_name = VOICE_CHOICES[voice_choice]
166
-
167
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
168
- temp_audio_path = temp_audio.name
169
 
170
- success = generate_single_audio_track(script, voice_name, is_cheerful, temp_audio_path)
171
- if not success:
172
- return None, "Failed to generate the audio track."
173
-
174
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
175
- output_video_path = temp_video.name
176
-
177
- result = replace_video_audio(video_path, temp_audio_path, output_video_path)
178
- if isinstance(result, str) and result.startswith("Error:"):
179
- return None, result
180
-
181
- os.remove(temp_audio_path)
182
-
183
- print("Processing complete!")
184
- print("-" * 50)
185
- return result, script
186
-
187
- except Exception as e:
188
- return None, f"An unexpected error occurred: {str(e)}"
189
-
190
- # Step 12: Gradio Interface
191
- def create_gradio_interface():
192
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
193
- gr.Markdown("# AI Single-Speaker Video Dubbing")
194
- gr.Markdown("Upload a video and choose a voice. The AI will transcribe all speech into a single script and re-voice the entire video with the selected narrator.")
195
-
196
- with gr.Row():
197
- with gr.Column(scale=1):
198
- video_input = gr.Video(label="Upload Video File")
199
- voice_selector = gr.Radio(
200
- list(VOICE_CHOICES.keys()),
201
- label="Select Narrator Voice",
202
- value="Male (Charon)"
203
- )
204
- cheerful_checkbox = gr.Checkbox(label="Enable Cheerful Tone", value=False)
205
- process_button = gr.Button("Generate Dubbed Video", variant="primary")
206
 
207
- with gr.Column(scale=1):
208
- video_output = gr.Video(label="Dubbed Video")
209
- dialogue_output = gr.Textbox(label="Generated Full Script", lines=15, interactive=False)
210
-
211
- process_button.click(
212
- fn=process_video_single_speaker,
213
- inputs=[video_input, voice_selector, cheerful_checkbox],
214
- outputs=[video_output, dialogue_output]
215
- )
216
- return demo
217
-
218
- # Step 13: Launch the Gradio app
219
- if __name__ == "__main__":
220
- gradio_app = create_gradio_interface()
221
- gradio_app.launch(debug=True)
 
 
 
 
 
 
 
 
1
  import os
2
+ import tempfile
 
3
  import google.generativeai as genai
4
  import requests
5
+ from flask import Flask, request, render_template, send_from_directory, url_for
 
 
6
  from moviepy.editor import VideoFileClip, AudioFileClip
7
+ from pydub import AudioSegment
8
+ from werkzeug.utils import secure_filename
9
  from google.generativeai.types import HarmCategory, HarmBlockThreshold
10
 
11
+ # --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
12
+ app = Flask(__name__)
13
+
14
+ # Load secrets from environment variables
15
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
16
+ TTS_API_URL = os.getenv("TTS_API_URL")
17
 
18
+ # Check if secrets were loaded correctly
19
  if not GEMINI_API_KEY:
20
+ raise ValueError("GEMINI_API_KEY secret not found! Please set it as an environment variable.")
21
  if not TTS_API_URL:
22
+ raise ValueError("TTS_API_URL secret not found! Please set it as an environment variable.")
23
 
24
  # Configure the Gemini API with the loaded key
25
  genai.configure(api_key=GEMINI_API_KEY)
26
 
27
+ # Configure directories for file uploads and downloads
28
+ UPLOAD_FOLDER = 'uploads'
29
+ DOWNLOAD_FOLDER = 'downloads'
30
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
31
+ os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
32
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
33
+ app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
34
+
35
+ # --- 2. DEFINE VOICE CHOICES AND GEMINI PROMPT ---
36
  VOICE_CHOICES = {
37
  "Male (Charon)": "Charon",
38
  "Female (Zephyr)": "Zephyr"
39
  }
40
 
 
41
  GEMINI_PROMPT = """
42
  You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
43
 
44
  **CRITICAL INSTRUCTIONS:**
45
+ 1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
46
+ 2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
47
+ 3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text.
 
 
 
 
48
 
49
  **EXAMPLE OUTPUT:**
50
+ Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
51
  """
52
 
53
+ # --- 3. HELPER FUNCTIONS (CORE LOGIC) ---
54
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def generate_tamil_script(video_file_path):
56
  """Generates a single, continuous Tamil script from the video."""
57
+ print("Uploading file to Gemini for transcription...")
58
+ video_file = genai.upload_file(video_file_path, mime_type="video/mp4")
59
+
60
+ print("Waiting for file processing...")
61
+ while video_file.state.name == "PROCESSING":
62
+ time.sleep(10)
63
+ video_file = genai.get_file(video_file.name)
64
+ if video_file.state.name != "ACTIVE":
65
+ raise Exception(f"File {video_file.name} failed to process")
66
+
67
+ print("Generating single narrator script...")
68
+ model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
69
+ response = model.generate_content([GEMINI_PROMPT, video_file])
70
+
71
+ genai.delete_file(video_file.name)
72
+ print("Deleted file from Gemini.")
73
+
74
+ if response.text:
75
+ return " ".join(response.text.strip().splitlines())
76
+ raise Exception("No valid script was generated by Gemini.")
 
 
 
 
 
 
77
 
 
78
  def generate_single_audio_track(dialogue_text, voice_name, is_cheerful, output_path):
79
  """Generates one continuous audio track for the entire script."""
80
+ print(f"Generating audio with voice '{voice_name}' | Cheerful: {is_cheerful}")
81
+ payload = {"text": dialogue_text, "voice_name": voice_name, "cheerful": is_cheerful}
82
+ response = requests.post(TTS_API_URL, json=payload)
83
+ if response.status_code == 200:
84
+ with open(output_path, "wb") as f:
85
+ f.write(response.content)
86
+ print(f"Audio track saved successfully to {output_path}")
87
+ return True
88
+ raise Exception(f"Error from TTS API: {response.status_code} - {response.text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
 
90
  def replace_video_audio(video_path, new_audio_path, output_path):
91
  """Replaces the audio of a video with a new audio file."""
92
+ print("Replacing video audio...")
93
+ video_clip = VideoFileClip(video_path)
94
+ audio_clip = AudioFileClip(new_audio_path)
95
+ final_clip = video_clip.set_audio(audio_clip)
96
+ final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
97
+ video_clip.close()
98
+ audio_clip.close()
99
+ final_clip.close()
100
+ print(f"Final video saved to {output_path}")
101
+
102
+ # --- 4. FLASK ROUTES ---
103
+
104
+ @app.route('/', methods=['GET'])
105
+ def index():
106
+ """Renders the main upload page."""
107
+ return render_template('index.html')
108
+
109
+ @app.route('/process', methods=['POST'])
110
+ def process_video():
111
+ """Handles the video upload and dubbing process."""
112
+ if 'video' not in request.files:
113
+ return "No video file part", 400
114
+ file = request.files['video']
115
+ if file.filename == '':
116
+ return "No selected file", 400
117
+
118
+ if file:
119
+ filename = secure_filename(file.filename)
120
+ upload_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
121
+ file.save(upload_path)
122
+
123
+ voice_choice = request.form['voice_choice']
124
+ is_cheerful = 'cheerful' in request.form
125
  voice_name = VOICE_CHOICES[voice_choice]
 
 
 
126
 
127
+ try:
128
+ # Generate the script
129
+ script = generate_tamil_script(upload_path)
130
+
131
+ # Generate the audio track
132
+ temp_audio_path = tempfile.mktemp(suffix=".wav")
133
+ generate_single_audio_track(script, voice_name, is_cheerful, temp_audio_path)
134
+
135
+ # Create the final video
136
+ final_video_name = f"dubbed_{filename}"
137
+ final_video_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_video_name)
138
+ replace_video_audio(upload_path, temp_audio_path, final_video_path)
139
+
140
+ # Clean up temporary audio file
141
+ os.remove(temp_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ # Render the same page but now with the results
144
+ return render_template('index.html',
145
+ result_video=url_for('serve_video', filename=final_video_name),
146
+ script=script)
147
+
148
+ except Exception as e:
149
+ print(f"An error occurred: {e}")
150
+ return f"An error occurred during processing: {e}", 500
151
+
152
+ @app.route('/downloads/<filename>')
153
+ def serve_video(filename):
154
+ """Serves the final dubbed video from the downloads directory."""
155
+ return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
156
+
157
+ if __name__ == '__main__':
158
+ # Use host='0.0.0.0' to make it accessible on your local network
159
+ app.run(debug=True, host='0.0.0.0', port=5001)