import os
import requests
import json
import time
import subprocess
import gradio as gr
import uuid
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
B_KEY = os.getenv("B_KEY")

# URLs
API_URL = os.getenv("API_URL")
UPLOAD_URL = os.getenv("UPLOAD_URL")

def get_voices():
    # OpenAI TTS voices
    return [
        ("alloy", "alloy"),
        ("echo", "echo"),
        ("fable", "fable"),
        ("onyx", "onyx"),
        ("nova", "nova"),
        ("shimmer", "shimmer")
    ]

def text_to_speech(voice, text):
    url = "https://api.openai.com/v1/audio/speech"
    
    headers = {
        "Authorization": f"Bearer {OPENAI_API_KEY}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "tts-1",
        "input": text,
        "voice": voice
    }
    
    response = requests.post(url, json=data, headers=headers)
    if response.status_code != 200:
        return None
    
    return response.content

def upload_file(file_content, file_name):
    files = {'fileToUpload': (file_name, file_content)}
    data = {'reqtype': 'fileupload'}
    response = requests.post(UPLOAD_URL, files=files, data=data)
    
    if response.status_code == 200:
        return response.text.strip()
    return None

def lipsync_api_call(video_url, audio_url):
    headers = {
        "Content-Type": "application/json",
        "x-api-key": B_KEY
    }
    
    data = {
        "audioUrl": audio_url,
        "videoUrl": video_url,
        "maxCredits": 1000,
        "model": "sync-1.7.1-beta",
        "synergize": True,
        "pads": [0, 5, 0, 0],
        "synergizerStrength": 1
    }
    
    response = requests.post(API_URL, headers=headers, json=data)
    return response.json()

def check_job_status(job_id):
    headers = {"x-api-key": B_KEY}
    max_attempts = 30  # Limit the number of attempts
    
    for _ in range(max_attempts):
        response = requests.get(f"{API_URL}/{job_id}", headers=headers)
        data = response.json()
        
        if data["status"] == "COMPLETED":
            return data["videoUrl"]
        elif data["status"] == "FAILED":
            return None
        
        time.sleep(10)
    return None

def get_media_duration(file_path):
    cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return float(result.stdout.strip())

def combine_audio_video(video_path, audio_content, output_path):
    # Save audio content to a temporary file
    temp_audio_path = f'temp_audio_{uuid.uuid4()}.mp3'
    with open(temp_audio_path, 'wb') as audio_file:
        audio_file.write(audio_content)

    # Get durations of both video and audio
    video_duration = get_media_duration(video_path)
    audio_duration = get_media_duration(temp_audio_path)

    if video_duration > audio_duration:
        # Trim video to match the audio length
        cmd = [
            'ffmpeg', '-i', video_path, '-i', temp_audio_path,
            '-t', str(audio_duration),  # Trim video to audio duration
            '-map', '0:v', '-map', '1:a',
            '-c:v', 'copy', '-c:a', 'aac',
            '-y', output_path
        ]
    else:
        # Loop video if it's shorter than audio
        loop_count = int(audio_duration // video_duration) + 1  # Calculate how many times to loop
        cmd = [
            'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', temp_audio_path,
            '-t', str(audio_duration),  # Match the duration of the final video with the audio
            '-map', '0:v', '-map', '1:a',
            '-c:v', 'copy', '-c:a', 'aac',
            '-shortest', '-y', output_path
        ]

    subprocess.run(cmd, check=True)

    # Clean up temporary audio file
    os.remove(temp_audio_path)

def process_video(voice, video_url, text, progress=gr.Progress()):
    session_id = str(uuid.uuid4())  # Generate a unique session ID
    
    progress(0, desc="Generating speech...")
    audio_content = text_to_speech(voice, text)
    if not audio_content:
        return None, "Failed to generate speech audio."
    
    progress(0.2, desc="Uploading audio...")
    audio_url = upload_file(audio_content, f"audio_{session_id}.mp3")
    if not audio_url:
        return None, "Failed to upload audio file."
    
    try:
        progress(0.4, desc="Initiating lipsync...")
        job_data = lipsync_api_call(video_url, audio_url)
        
        if "error" in job_data or "message" in job_data:
            raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))
        
        job_id = job_data["id"]
        
        progress(0.5, desc="Processing lipsync...")
        result_url = check_job_status(job_id)
        
        if result_url:
            progress(0.9, desc="Downloading result...")
            response = requests.get(result_url)
            output_path = f"output_{session_id}.mp4"
            with open(output_path, "wb") as f:
                f.write(response.content)
            progress(1.0, desc="Complete!")
            return output_path, "Lipsync completed successfully!"
        else:
            raise Exception("Lipsync processing failed or timed out")
            
    except Exception as e:
        progress(0.8, desc="Falling back to simple combination...")
        try:
            # Download the video from the URL
            video_response = requests.get(video_url)
            video_path = f"temp_video_{session_id}.mp4"
            with open(video_path, "wb") as f:
                f.write(video_response.content)
            
            output_path = f"output_{session_id}.mp4"
            combine_audio_video(video_path, audio_content, output_path)
            progress(1.0, desc="Complete!")
            return output_path, f"Used fallback method. Original error: {str(e)}"
        except Exception as fallback_error:
            return None, f"All methods failed. Error: {str(fallback_error)}"
    finally:
        # Cleanup
        if os.path.exists(f"temp_video_{session_id}.mp4"):
            os.remove(f"temp_video_{session_id}.mp4")

def create_interface():
    voices = get_voices()
    
    with gr.Blocks() as app:
        gr.Markdown("# Lipsync Video Generator")
        with gr.Row():
            with gr.Column():
                voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
                video_url_input = gr.Textbox(label="Enter Video URL")
                text_input = gr.Textbox(label="Enter text", lines=3)
                generate_btn = gr.Button("Generate Video")
            with gr.Column():
                video_output = gr.Video(label="Generated Video")
                status_output = gr.Textbox(label="Status", interactive=False)
        
        def on_generate(voice_name, video_url, text):
            voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
            if not voice_id:
                return None, "Invalid voice selected."
            return process_video(voice_id, video_url, text)
        
        generate_btn.click(
            fn=on_generate,
            inputs=[voice_dropdown, video_url_input, text_input],
            outputs=[video_output, status_output]
        )
    
    return app

if __name__ == "__main__":
    app = create_interface()
    app.launch()