# -*- coding: utf-8 -*-
"""
Script para baixar um vídeo do YouTube, extrair frames, analisar com GPT-4o e contar aves.
"""

import os
import subprocess
import cv2
import base64
import time
import json
import re 
import openai

# --- Configurações (Substitua os placeholders) ---
VIDEO_URL = "https://www.youtube.com/watch?v=1htKBjuUWec"  # Substitua pela URL do vídeo do YouTube
OUTPUT_DIR = "./audio_analysis_output" # Diretório para salvar o áudio
AUDIO_FILENAME = "downloaded_audio"
TRANSCRIPT_FILENAME = "transcript.txt"
AUDIO_PATH = os.path.join(OUTPUT_DIR, AUDIO_FILENAME)
TRANSCRIPT_PATH = os.path.join(OUTPUT_DIR, TRANSCRIPT_FILENAME)

# Verifica se a URL foi definida
if VIDEO_URL == "URL_DO_SEU_VIDEO_AQUI":
    print("AVISO: A URL do vídeo não foi definida. Por favor, edite o script e insira a URL desejada.")
    # exit(1)

# --- Funções ---

def create_output_directory():
    """Cria o diretório de saída se não existir."""
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)
        print(f"Diretório criado: {OUTPUT_DIR}")
        
def retirar_sufixo_codec_arquivo(directory) -> None:
    for filename in os.listdir(directory):
        # Procura padrão como ".f123" antes da extensão
        new_filename = re.sub(r'\.f\d{3}(?=\.\w+$)', '', filename)
        if new_filename != filename:
            old_path = os.path.join(directory, filename)
            new_path = os.path.join(directory, new_filename)
            os.rename(old_path, new_path)
            print(f"Renomeado: {filename} → {new_filename}")
            

def download_audio(url, output_path):
    """Baixa apenas o áudio do YouTube usando yt-dlp."""
    print(f"Baixando áudio de {url} para {output_path}...")
    try:
        # Comando yt-dlp para baixar o melhor áudio disponível e convertê-lo para mp3
        
        
        command = [
            'yt-dlp',
            '-f', 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
            '-o', output_path,
            url
        ]
        
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        retirar_sufixo_codec_arquivo(OUTPUT_DIR)
        
        print("Download de áudio concluído com sucesso.")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Erro ao baixar o áudio: {e}")
        print(f"Saída do erro: {e.stderr}")
        return False
    except FileNotFoundError:
        print("Erro: O comando 'yt-dlp' não foi encontrado. Certifique-se de que ele está instalado e no PATH do sistema.")
        return False

def extract_text_from_audio(audio_path, output_txt_path=None) -> str:
    """
    Usa a API Whisper da OpenAI para transcrever o áudio em texto com quebras de linha naturais,
    removendo timestamps e IDs. Salva em arquivo .txt se o caminho for fornecido.
    """
    try:
        openai.api_key = os.getenv("OPENAI_API_KEY")
        print(f"Iniciando transcrição (formato SRT simplificado): {audio_path}")

        with open(audio_path, "rb") as audio_file:
            srt_result = openai.Audio.transcribe(
                model="whisper-1",
                file=audio_file,
                response_format="srt"
            )

        # Remove linhas com números e timestamps
        lines = srt_result.splitlines()
        only_text = [line.strip() for line in lines if not re.match(r"^\d+$", line) and "-->" not in line]
        formatted_text = "\n".join(only_text)

        # Salva em .txt se desejado
        if output_txt_path:
            with open(output_txt_path, "w", encoding="utf-8") as f:
                f.write(formatted_text)
            print(f"Transcrição salva em: {output_txt_path}")

        return formatted_text
    except Exception as e:
        print(f"Erro ao transcrever áudio: {e}")
        return ""


# --- Atualização do Bloco Principal --- 
# (Adicionar inicialização do cliente OpenAI e o loop de análise)
if __name__ == "__main__":
    create_output_directory()
    
    # Etapa 1: Baixar o vídeo
    video_downloaded_or_exists = False
    if VIDEO_URL != "URL_DO_SEU_VIDEO_AQUI":
        if download_audio(VIDEO_URL, AUDIO_PATH):
            print(f"Vídeo salvo em: {AUDIO_PATH}")
            video_downloaded_or_exists = True
        else:
            print("Falha no download do vídeo. Pulando etapas dependentes.")
    elif os.path.exists(AUDIO_PATH):
         print(f"URL não fornecida, mas vídeo encontrado em {AUDIO_PATH}. Tentando processar.")
         video_downloaded_or_exists = True
    else:
        print("URL do vídeo não fornecida e vídeo local não encontrado. Pulando download e extração.")

    if False: 

        # Etapa 2: Extrair frames
        if video_downloaded_or_exists:
            extract_text_from_audio(AUDIO_PATH + '.mp3', TRANSCRIPT_PATH)
        else:
            print("Pulando extração de frames pois o vídeo não está disponível.")