File size: 4,911 Bytes
d24a2f3 8a409a5 21e6f34 a2554b6 d24a2f3 08c3547 a2554b6 d24a2f3 a2554b6 08c3547 8a409a5 a2554b6 21e6f34 a2554b6 21e6f34 a2554b6 21e6f34 a2554b6 893e301 a2554b6 893e301 21e6f34 d24a2f3 a2554b6 ab46005 8a814dc a2554b6 d24a2f3 a2554b6 352553f a2554b6 8a409a5 a2554b6 8a409a5 a2554b6 d6208ae a2554b6 893e301 a2554b6 893e301 a2554b6 893e301 a2554b6 a30e87b a2554b6 d6208ae a2554b6 893e301 a2554b6 d6208ae a2554b6 893e301 a2554b6 d6208ae a2554b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import os
import tempfile
import uuid
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from moviepy.editor import VideoFileClip, AudioFileClip
import google.generativeai as genai
import requests
from dotenv import load_dotenv
from pathlib import Path
# Load environment variables
load_dotenv()
app = FastAPI()
# Configure directories
UPLOAD_DIR = "uploads"
DOWNLOAD_DIR = "downloads"
Path(UPLOAD_DIR).mkdir(exist_ok=True)
Path(DOWNLOAD_DIR).mkdir(exist_ok=True)
# Mount static files
app.mount("/downloads", StaticFiles(directory="downloads"), name="downloads")
# Configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TTS_API_URL = os.getenv("TTS_API_URL")
genai.configure(api_key=GEMINI_API_KEY)
VOICE_CHOICES = {
"male": "Charon",
"female": "Zephyr"
}
GEMINI_PROMPT = """
You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
**CRITICAL INSTRUCTIONS:**
1. Combine all dialogue into one continuous script.
2. NO timestamps or speaker labels.
3. Add performance cues (e.g., [laugh], [sigh]) and directions (e.g., "Say happily:").
"""
@app.post("/process")
async def process_video(
file: UploadFile = File(...),
voice: str = Form("male"),
cheerful: bool = Form(False)
):
try:
# Save uploaded file
file_ext = Path(file.filename).suffix
file_name = f"{uuid.uuid4()}{file_ext}"
file_path = os.path.join(UPLOAD_DIR, file_name)
with open(file_path, "wb") as buffer:
buffer.write(await file.read())
# Generate script using Gemini
script = await generate_script(file_path)
# Generate audio
audio_path = os.path.join(UPLOAD_DIR, f"audio_{uuid.uuid4()}.wav")
await generate_audio(script, voice, cheerful, audio_path)
# Create dubbed video
output_name = f"dubbed_{file_name}"
output_path = os.path.join(DOWNLOAD_DIR, output_name)
await create_dubbed_video(file_path, audio_path, output_path)
# Cleanup
os.remove(file_path)
os.remove(audio_path)
return {
"video_url": f"/downloads/{output_name}",
"script": script
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
async def generate_script(video_path: str) -> str:
try:
video_file = genai.upload_file(video_path, mime_type="video/mp4")
while video_file.state.name == "PROCESSING":
video_file = genai.get_file(video_file.name)
if video_file.state.name != "ACTIVE":
raise Exception("Gemini processing failed")
model = genai.GenerativeModel("models/gemini-1.5-pro-latest")
response = model.generate_content([GEMINI_PROMPT, video_file])
genai.delete_file(video_file.name)
if hasattr(response, 'text'):
return " ".join(response.text.strip().splitlines())
raise Exception("No script generated")
except Exception as e:
raise Exception(f"Script generation failed: {str(e)}")
async def generate_audio(text: str, voice: str, cheerful: bool, output_path: str):
try:
voice_name = VOICE_CHOICES.get(voice, "Charon")
payload = {
"text": text,
"voice_name": voice_name,
"cheerful": cheerful
}
response = requests.post(TTS_API_URL, json=payload, timeout=300)
if response.status_code != 200:
raise Exception(f"TTS API error: {response.text}")
with open(output_path, "wb") as f:
f.write(response.content)
except Exception as e:
raise Exception(f"Audio generation failed: {str(e)}")
async def create_dubbed_video(video_path: str, audio_path: str, output_path: str):
try:
video = VideoFileClip(video_path)
audio = AudioFileClip(audio_path)
# Ensure audio matches video duration
if audio.duration > video.duration:
audio = audio.subclip(0, video.duration)
video = video.set_audio(audio)
video.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
threads=4,
preset="fast"
)
video.close()
audio.close()
except Exception as e:
raise Exception(f"Video processing failed: {str(e)}")
@app.get("/downloads/{file_name}")
async def download_file(file_name: str):
file_path = os.path.join(DOWNLOAD_DIR, file_name)
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="File not found")
return FileResponse(file_path) |