import gradio as gr
import whisper
import torch
from transformers import pipeline
import tempfile
import os
import subprocess
import logging
from typing import Optional, Tuple
import re
import warnings
warnings.filterwarnings("ignore")

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class SubtitleTranslator:
    def __init__(self):
        # Use the smallest Whisper model for speed
        self.whisper_model = None
        self.translator = None
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        logger.info(f"Using device: {self.device}")
    
    def load_models(self):
        """Load models lazily to save memory"""
        if self.whisper_model is None:
            logger.info("Loading Whisper model...")
            self.whisper_model = whisper.load_model("base", device=self.device)
        
        if self.translator is None:
            logger.info("Loading translation model...")
            # Use a lightweight translation model
            try:
                self.translator = pipeline(
                    "translation", 
                    model="Helsinki-NLP/opus-mt-mul-en",
                    device=0 if self.device == "cuda" else -1
                )
            except Exception as e:
                logger.warning(f"Failed to load Helsinki model, using Facebook model: {e}")
                self.translator = pipeline(
                    "translation",
                    model="facebook/m2m100_418M",
                    device=0 if self.device == "cuda" else -1
                )
    
    def extract_audio(self, video_path: str) -> str:
        """Extract audio from video file"""
        audio_path = tempfile.mktemp(suffix=".wav")
        try:
            # Use ffmpeg to extract audio - works with any video format/size
            cmd = [
                "ffmpeg", "-i", video_path, 
                "-vn", "-acodec", "pcm_s16le", 
                "-ar", "16000", "-ac", "1", 
                audio_path, "-y"
            ]
            subprocess.run(cmd, check=True, capture_output=True)
            logger.info(f"Audio extracted to: {audio_path}")
            return audio_path
        except subprocess.CalledProcessError as e:
            logger.error(f"Audio extraction failed: {e}")
            raise Exception("Failed to extract audio from video")
    
    def transcribe_audio(self, audio_path: str) -> dict:
        """Transcribe audio using Whisper"""
        try:
            logger.info("Starting transcription...")
            result = self.whisper_model.transcribe(
                audio_path,
                task="transcribe",
                fp16=self.device == "cuda"
            )
            logger.info("Transcription completed")
            return result
        except Exception as e:
            logger.error(f"Transcription failed: {e}")
            raise Exception("Failed to transcribe audio")
    
    def translate_text(self, text: str, source_lang: str = None) -> str:
        """Translate text to English"""
        if not text.strip():
            return ""
        
        try:
            # If already in English, return as is
            if source_lang == "en":
                return text
            
            # For Helsinki model, use direct translation
            if "Helsinki" in str(type(self.translator.model)):
                result = self.translator(text)
                return result[0]['translation_text'] if result else text
            
            # For M2M100 model, specify target language
            else:
                result = self.translator(text, forced_bos_token_id=self.translator.tokenizer.get_lang_id("en"))
                return result[0]['translation_text'] if result else text
                
        except Exception as e:
            logger.error(f"Translation failed: {e}")
            return text  # Return original if translation fails
    
    def format_time(self, seconds: float) -> str:
        """Format time for SRT subtitle format"""
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        secs = seconds % 60
        return f"{hours:02d}:{minutes:02d}:{secs:06.3f}".replace('.', ',')
    
    def create_srt(self, segments: list, translated: bool = False) -> str:
        """Create SRT subtitle format"""
        srt_content = ""
        for i, segment in enumerate(segments, 1):
            start_time = self.format_time(segment['start'])
            end_time = self.format_time(segment['end'])
            text = segment.get('translated_text', segment['text']) if translated else segment['text']
            
            srt_content += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
        
        return srt_content
    
    def process_video(self, video_path: str, translate: bool = True) -> Tuple[str, str, str]:
        """Main processing function"""
        try:
            # Load models
            self.load_models()
            
            # Extract audio
            audio_path = self.extract_audio(video_path)
            
            try:
                # Transcribe
                result = self.transcribe_audio(audio_path)
                detected_language = result.get('language', 'unknown')
                
                # Process segments
                segments = result['segments']
                
                if translate and detected_language != 'en':
                    logger.info(f"Translating from {detected_language} to English...")
                    for segment in segments:
                        segment['translated_text'] = self.translate_text(
                            segment['text'], detected_language
                        )
                
                # Create subtitle files
                original_srt = self.create_srt(segments, translated=False)
                translated_srt = self.create_srt(segments, translated=True) if translate else ""
                
                # Save to temporary files
                original_file = tempfile.mktemp(suffix=".srt")
                with open(original_file, 'w', encoding='utf-8') as f:
                    f.write(original_srt)
                
                translated_file = None
                if translate and detected_language != 'en':
                    translated_file = tempfile.mktemp(suffix=".srt")
                    with open(translated_file, 'w', encoding='utf-8') as f:
                        f.write(translated_srt)
                
                return original_file, translated_file, f"Detected language: {detected_language}"
            
            finally:
                # Clean up audio file
                if os.path.exists(audio_path):
                    os.unlink(audio_path)
        
        except Exception as e:
            logger.error(f"Processing failed: {e}")
            raise gr.Error(f"Processing failed: {str(e)}")

# Initialize the translator
translator = SubtitleTranslator()

def process_video_interface(video_file, translate_option):
    """Gradio interface function"""
    if video_file is None:
        raise gr.Error("Please upload a video file")
    
    translate = translate_option == "Yes"
    
    try:
        original_srt, translated_srt, info = translator.process_video(video_file, translate)
        
        outputs = [original_srt, info]
        
        if translated_srt:
            outputs.append(translated_srt)
            return outputs[0], outputs[1], outputs[2]
        else:
            return outputs[0], outputs[1], None
    
    except Exception as e:
        raise gr.Error(f"Error processing video: {str(e)}")

# Create Gradio interface
def create_interface():
    with gr.Blocks(
        title="Video Subtitle Translator",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {max-width: 1000px; margin: auto;}
        .subtitle-info {background: #f0f8ff; padding: 15px; border-radius: 10px; margin: 10px 0;}
        """
    ) as demo:
        
        gr.HTML("""
        <div style="text-align: center; padding: 20px;">
            <h1>🎬 Video Subtitle Translator</h1>
            <p>Generate and translate subtitles for any video - No size or duration limits!</p>
            <p><em>Supports all video formats • Automatic language detection • Fast processing</em></p>
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                video_input = gr.File(
                    label="Upload Video File",
                    file_types=["video"],
                    type="filepath"
                )
                
                translate_option = gr.Radio(
                    choices=["Yes", "No"],
                    value="Yes",
                    label="Translate to English?",
                    info="Choose 'No' if you only want transcription in original language"
                )
                
                process_btn = gr.Button(
                    "🚀 Generate Subtitles",
                    variant="primary",
                    size="lg"
                )
            
            with gr.Column(scale=3):
                info_output = gr.Textbox(
                    label="Processing Info",
                    interactive=False,
                    elem_classes=["subtitle-info"]
                )
                
                original_output = gr.File(
                    label="📝 Original Subtitles (.srt)",
                    interactive=False
                )
                
                translated_output = gr.File(
                    label="🌍 English Translated Subtitles (.srt)",
                    interactive=False,
                    visible=True
                )
        
        gr.HTML("""
        <div style="margin-top: 30px; padding: 20px; background: #f8f9fa; border-radius: 10px;">
            <h3>📋 Instructions:</h3>
            <ol>
                <li><strong>Upload any video file</strong> - MP4, AVI, MOV, MKV, etc.</li>
                <li><strong>Choose translation option</strong> - Yes for English translation, No for original language only</li>
                <li><strong>Click "Generate Subtitles"</strong> - Processing time depends on video length</li>
                <li><strong>Download your subtitle files</strong> - Use them with any video player</li>
            </ol>
            
            <h3>✨ Features:</h3>
            <ul>
                <li>🎯 <strong>No size limits</strong> - Process videos of any duration</li>
                <li>🌐 <strong>Auto language detection</strong> - Supports 50+ languages</li>
                <li>⚡ <strong>Lightweight models</strong> - Fast processing on any hardware</li>
                <li>📱 <strong>Universal compatibility</strong> - Works with all video formats</li>
                <li>🔧 <strong>SRT format</strong> - Compatible with all media players</li>
            </ul>
        </div>
        """)
        
        # Set up the processing
        process_btn.click(
            fn=process_video_interface,
            inputs=[video_input, translate_option],
            outputs=[original_output, info_output, translated_output]
        )
    
    return demo

# Launch the app
if __name__ == "__main__":
    demo = create_interface()
    demo.launch(share=True)