Athspi commited on
Commit
d24a2f3
·
verified ·
1 Parent(s): c605b09

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +221 -0
app.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Step 1: Install required libraries
2
+ # Run this command in your terminal or notebook before running the script:
3
+ # pip install gradio requests google-generativeai moviepy pydub
4
+
5
+ # Step 2: Import libraries
6
+ import os
7
+ import time
8
+ import re
9
+ import google.generativeai as genai
10
+ import requests
11
+ import gradio as gr
12
+ import tempfile
13
+ from pydub import AudioSegment
14
+ from moviepy.editor import VideoFileClip, AudioFileClip
15
+ from google.generativeai.types import HarmCategory, HarmBlockThreshold
16
+
17
+ # --- SECRET MANAGEMENT ---
18
+ # Step 3: Load secrets from the environment
19
+ # This securely loads the API key and URL you set in the Colab/Hugging Face secrets manager.
20
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
+ TTS_API_URL = os.getenv("TTS_API_URL", "https://athspi-aitools-aittsg.hf.space/api/generate-tts/") # Default fallback
22
+
23
+ # Check if the secrets were loaded correctly
24
+ if not GEMINI_API_KEY:
25
+ raise ValueError("GEMINI_API_KEY secret not found! Please set it in your environment or Colab/Hugging Face secrets.")
26
+ if not TTS_API_URL:
27
+ raise ValueError("TTS_API_URL secret not found or empty! Please set it.")
28
+
29
+ # Configure the Gemini API with the loaded key
30
+ genai.configure(api_key=GEMINI_API_KEY)
31
+
32
+ # Step 4: Define Voice Choices
33
+ VOICE_CHOICES = {
34
+ "Male (Charon)": "Charon",
35
+ "Female (Zephyr)": "Zephyr"
36
+ }
37
+
38
+ # Step 5: "Single Narrator" Magic Prompt for Gemini
39
+ GEMINI_PROMPT = """
40
+ You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
41
+
42
+ **CRITICAL INSTRUCTIONS:**
43
+
44
+ 1. **Single Script:** Combine all dialogue from all speakers into one continuous script. The final output should be a single paragraph of text.
45
+ 2. **NO Timestamps:** Do NOT include any timestamps (e.g., [00:01 - 00:03]).
46
+ 3. **NO Speaker Labels:** Do NOT include any speaker labels or identifiers (e.g., ஆண்_1, பெண்_2).
47
+ 4. **Incorporate Performance:** To make the script expressive for a single narrator, add English style prompts and performance tags directly into the text.
48
+ - Use style prompts like `Say happily:`, `Whisper mysteriously:`, `Shout angrily:`.
49
+ - Use performance tags like `[laugh]`, `[singing]`, `[sigh]`.
50
+
51
+ **EXAMPLE OUTPUT:**
52
+ Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும். Shout angrily: உடனே இங்கிருந்து போ!
53
+ """
54
+
55
+ # Step 6: Function to upload files to Gemini
56
+ def upload_to_gemini(path, mime_type=None):
57
+ """Uploads the given file to Gemini."""
58
+ print(f"Uploading file: {path}")
59
+ file = genai.upload_file(path, mime_type=mime_type)
60
+ print(f"Uploaded file '{file.display_name}' as: {file.uri}")
61
+ return file
62
+
63
+ # Step 7: Function to wait for files to be active
64
+ def wait_for_files_active(files):
65
+ """Waits for the given files to be active."""
66
+ print("Waiting for file processing...")
67
+ for name in (file.name for file in files):
68
+ file = genai.get_file(name)
69
+ while file.state.name == "PROCESSING":
70
+ print(".", end="", flush=True)
71
+ time.sleep(10)
72
+ file = genai.get_file(name)
73
+ if file.state.name != "ACTIVE":
74
+ raise Exception(f"File {file.name} failed to process")
75
+ print("...all files ready")
76
+ print()
77
+
78
+ # Step 8: Function to generate the single Tamil script
79
+ def generate_tamil_script(video_file_path):
80
+ """Generates a single, continuous Tamil script from the video."""
81
+ try:
82
+ video_file = upload_to_gemini(video_file_path, mime_type="video/mp4")
83
+ wait_for_files_active([video_file])
84
+
85
+ model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
86
+ print("Generating single narrator script...")
87
+ response = model.generate_content(
88
+ [GEMINI_PROMPT, video_file],
89
+ request_options={"timeout": 1000},
90
+ safety_settings={
91
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
92
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
93
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
94
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
95
+ }
96
+ )
97
+
98
+ print(f"Deleting uploaded file from Gemini: {video_file.name}")
99
+ genai.delete_file(video_file.name)
100
+
101
+ if response.text:
102
+ return " ".join(response.text.strip().splitlines())
103
+ else:
104
+ return "Error: No valid script was generated by Gemini."
105
+ except Exception as e:
106
+ return f"Error in Gemini generation: {str(e)}"
107
+
108
+ # Step 9: Simplified function to generate a single audio file
109
+ def generate_single_audio_track(dialogue_text, voice_name, is_cheerful, output_path):
110
+ """Generates one continuous audio track for the entire script."""
111
+ try:
112
+ print(f"Generating single audio track with voice '{voice_name}' | Cheerful: {is_cheerful}")
113
+
114
+ payload = {
115
+ "text": dialogue_text,
116
+ "voice_name": voice_name,
117
+ "cheerful": is_cheerful
118
+ }
119
+
120
+ response = requests.post(TTS_API_URL, json=payload)
121
+
122
+ if response.status_code == 200:
123
+ with open(output_path, "wb") as f:
124
+ f.write(response.content)
125
+ print(f"Audio track saved successfully to {output_path}")
126
+ return True
127
+ else:
128
+ print(f"Error from TTS API: {response.status_code} - {response.text}")
129
+ return False
130
+
131
+ except Exception as e:
132
+ print(f"An error occurred in generate_single_audio_track: {str(e)}")
133
+ return False
134
+
135
+ # Step 10: Function to replace video audio
136
+ def replace_video_audio(video_path, new_audio_path, output_path):
137
+ """Replaces the audio of a video with a new audio file."""
138
+ try:
139
+ video_clip = VideoFileClip(video_path)
140
+ audio_clip = AudioFileClip(new_audio_path)
141
+ final_clip = video_clip.set_audio(audio_clip)
142
+ final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
143
+ video_clip.close()
144
+ audio_clip.close()
145
+ final_clip.close()
146
+ return output_path
147
+ except Exception as e:
148
+ return f"Error replacing video audio: {str(e)}"
149
+
150
+ # Step 11: Main processing function
151
+ def process_video_single_speaker(video_path, voice_choice, is_cheerful):
152
+ """Processes the video for single-speaker dubbing."""
153
+ if not video_path:
154
+ return None, "Please upload a video file first."
155
+
156
+ try:
157
+ print("-" * 50)
158
+ print(f"Starting single-speaker processing for: {video_path}")
159
+
160
+ script = generate_tamil_script(video_path)
161
+ if script.startswith("Error:"):
162
+ return None, script
163
+ print("\n--- Generated Script ---\n", script, "\n------------------------\n")
164
+
165
+ voice_name = VOICE_CHOICES[voice_choice]
166
+
167
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
168
+ temp_audio_path = temp_audio.name
169
+
170
+ success = generate_single_audio_track(script, voice_name, is_cheerful, temp_audio_path)
171
+ if not success:
172
+ return None, "Failed to generate the audio track."
173
+
174
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
175
+ output_video_path = temp_video.name
176
+
177
+ result = replace_video_audio(video_path, temp_audio_path, output_video_path)
178
+ if isinstance(result, str) and result.startswith("Error:"):
179
+ return None, result
180
+
181
+ os.remove(temp_audio_path)
182
+
183
+ print("Processing complete!")
184
+ print("-" * 50)
185
+ return result, script
186
+
187
+ except Exception as e:
188
+ return None, f"An unexpected error occurred: {str(e)}"
189
+
190
+ # Step 12: Gradio Interface
191
+ def create_gradio_interface():
192
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
193
+ gr.Markdown("# AI Single-Speaker Video Dubbing")
194
+ gr.Markdown("Upload a video and choose a voice. The AI will transcribe all speech into a single script and re-voice the entire video with the selected narrator.")
195
+
196
+ with gr.Row():
197
+ with gr.Column(scale=1):
198
+ video_input = gr.Video(label="Upload Video File")
199
+ voice_selector = gr.Radio(
200
+ list(VOICE_CHOICES.keys()),
201
+ label="Select Narrator Voice",
202
+ value="Male (Charon)"
203
+ )
204
+ cheerful_checkbox = gr.Checkbox(label="Enable Cheerful Tone", value=False)
205
+ process_button = gr.Button("Generate Dubbed Video", variant="primary")
206
+
207
+ with gr.Column(scale=1):
208
+ video_output = gr.Video(label="Dubbed Video")
209
+ dialogue_output = gr.Textbox(label="Generated Full Script", lines=15, interactive=False)
210
+
211
+ process_button.click(
212
+ fn=process_video_single_speaker,
213
+ inputs=[video_input, voice_selector, cheerful_checkbox],
214
+ outputs=[video_output, dialogue_output]
215
+ )
216
+ return demo
217
+
218
+ # Step 13: Launch the Gradio app
219
+ if __name__ == "__main__":
220
+ gradio_app = create_gradio_interface()
221
+ gradio_app.launch(debug=True)