Final_Assignment_Template

Sleeping

App Files Files Community

gdms commited on May 8

Commit

75ce402

1 Parent(s): 4147b5e

tool de transcrição ok

Browse files

Files changed (1) hide show

tool_audio_extractor.py +20 -28

tool_audio_extractor.py CHANGED Viewed

@@ -10,15 +10,13 @@ import base64
 import time
 import json
 import re
-import openai
 # --- Configurações (Substitua os placeholders) ---
 VIDEO_URL = "https://www.youtube.com/watch?v=1htKBjuUWec"  # Substitua pela URL do vídeo do YouTube
 OUTPUT_DIR = "./audio_analysis_output" # Diretório para salvar o áudio
 AUDIO_FILENAME = "downloaded_audio"
 TRANSCRIPT_FILENAME = "transcript.txt"
-AUDIO_PATH = os.path.join(OUTPUT_DIR, AUDIO_FILENAME)
-TRANSCRIPT_PATH = os.path.join(OUTPUT_DIR, TRANSCRIPT_FILENAME)
 # Verifica se a URL foi definida
 if VIDEO_URL == "URL_DO_SEU_VIDEO_AQUI":
@@ -44,8 +42,11 @@ def retirar_sufixo_codec_arquivo(directory) -> None:
             print(f"Renomeado: {filename} → {new_filename}")
-def download_audio(url, output_path):
     """Baixa apenas o áudio do YouTube usando yt-dlp."""
     print(f"Baixando áudio de {url} para {output_path}...")
     try:
         # Comando yt-dlp para baixar o melhor áudio disponível e convertê-lo para mp3
@@ -53,7 +54,7 @@ def download_audio(url, output_path):
         command = [
             'yt-dlp',
-            '-f', 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
             '-o', output_path,
             url
         ]
@@ -71,32 +72,34 @@ def download_audio(url, output_path):
         print("Erro: O comando 'yt-dlp' não foi encontrado. Certifique-se de que ele está instalado e no PATH do sistema.")
         return False
-def extract_text_from_audio(audio_path, output_txt_path=None) -> str:
     """
     Usa a API Whisper da OpenAI para transcrever o áudio em texto com quebras de linha naturais,
     removendo timestamps e IDs. Salva em arquivo .txt se o caminho for fornecido.
     """
     try:
-        openai.api_key = os.getenv("OPENAI_API_KEY")
         print(f"Iniciando transcrição (formato SRT simplificado): {audio_path}")
         with open(audio_path, "rb") as audio_file:
-            srt_result = openai.Audio.transcribe(
                 model="whisper-1",
                 file=audio_file,
                 response_format="srt"
             )
         # Remove linhas com números e timestamps
-        lines = srt_result.splitlines()
         only_text = [line.strip() for line in lines if not re.match(r"^\d+$", line) and "-->" not in line]
         formatted_text = "\n".join(only_text)
         # Salva em .txt se desejado
-        if output_txt_path:
-            with open(output_txt_path, "w", encoding="utf-8") as f:
-                f.write(formatted_text)
-            print(f"Transcrição salva em: {output_txt_path}")
         return formatted_text
     except Exception as e:
@@ -112,24 +115,13 @@ if __name__ == "__main__":
     # Etapa 1: Baixar o vídeo
     video_downloaded_or_exists = False
     if VIDEO_URL != "URL_DO_SEU_VIDEO_AQUI":
-        if download_audio(VIDEO_URL, AUDIO_PATH):
-            print(f"Vídeo salvo em: {AUDIO_PATH}")
             video_downloaded_or_exists = True
         else:
             print("Falha no download do vídeo. Pulando etapas dependentes.")
-    elif os.path.exists(AUDIO_PATH):
-         print(f"URL não fornecida, mas vídeo encontrado em {AUDIO_PATH}. Tentando processar.")
-         video_downloaded_or_exists = True
     else:
-        print("URL do vídeo não fornecida e vídeo local não encontrado. Pulando download e extração.")
-    if False:
-        # Etapa 2: Extrair frames
-        if video_downloaded_or_exists:
-            extract_text_from_audio(AUDIO_PATH + '.mp3', TRANSCRIPT_PATH)
-        else:
-            print("Pulando extração de frames pois o vídeo não está disponível.")

 import time
 import json
 import re
+from openai import OpenAI
 # --- Configurações (Substitua os placeholders) ---
 VIDEO_URL = "https://www.youtube.com/watch?v=1htKBjuUWec"  # Substitua pela URL do vídeo do YouTube
 OUTPUT_DIR = "./audio_analysis_output" # Diretório para salvar o áudio
 AUDIO_FILENAME = "downloaded_audio"
 TRANSCRIPT_FILENAME = "transcript.txt"
 # Verifica se a URL foi definida
 if VIDEO_URL == "URL_DO_SEU_VIDEO_AQUI":
             print(f"Renomeado: {filename} → {new_filename}")
+def download_audio(url):
     """Baixa apenas o áudio do YouTube usando yt-dlp."""
+    output_path = f'{OUTPUT_DIR}/{AUDIO_FILENAME}.%(ext)s'
     print(f"Baixando áudio de {url} para {output_path}...")
     try:
         # Comando yt-dlp para baixar o melhor áudio disponível e convertê-lo para mp3
         command = [
             'yt-dlp',
+            '-f', 'bestaudio[ext=m4a]',
             '-o', output_path,
             url
         ]
         print("Erro: O comando 'yt-dlp' não foi encontrado. Certifique-se de que ele está instalado e no PATH do sistema.")
         return False
+def extract_text_from_audio() -> str:
     """
     Usa a API Whisper da OpenAI para transcrever o áudio em texto com quebras de linha naturais,
     removendo timestamps e IDs. Salva em arquivo .txt se o caminho for fornecido.
     """
+    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     try:
+        audio_path = f"{OUTPUT_DIR}/{AUDIO_FILENAME}.m4a"
         print(f"Iniciando transcrição (formato SRT simplificado): {audio_path}")
         with open(audio_path, "rb") as audio_file:
+            transcription = client.audio.transcriptions.create(
                 model="whisper-1",
                 file=audio_file,
                 response_format="srt"
             )
         # Remove linhas com números e timestamps
+        lines = transcription.splitlines()
         only_text = [line.strip() for line in lines if not re.match(r"^\d+$", line) and "-->" not in line]
         formatted_text = "\n".join(only_text)
         # Salva em .txt se desejado
+        output_txt_path = f"{OUTPUT_DIR}/{TRANSCRIPT_FILENAME}"
+        with open(output_txt_path, "w", encoding="utf-8") as f:
+            f.write(formatted_text)
+        print(f"Transcrição salva em: {output_txt_path}")
         return formatted_text
     except Exception as e:
     # Etapa 1: Baixar o vídeo
     video_downloaded_or_exists = False
     if VIDEO_URL != "URL_DO_SEU_VIDEO_AQUI":
+        if download_audio(VIDEO_URL):
+            print(f"AUDIO salvo em: {OUTPUT_DIR}")
             video_downloaded_or_exists = True
         else:
             print("Falha no download do vídeo. Pulando etapas dependentes.")
     else:
+        print("Vídeo não informado")
+    extract_text_from_audio()