video_splitter / app.py
BahadirGLCK's picture
Change application flow.
6621c82
raw
history blame
6.52 kB
import os
import datetime
import hashlib
import requests
import numpy as np
import gradio as gr
import whisper
import srt
import torch
LANGUAGE_OPTIONS = {
"Afrikaans": "af",
"Arabic": "ar",
"Azerbaijani": "az",
"Belarusian": "be",
"Bulgarian": "bg",
"Bengali": "bn",
"Catalan": "ca",
"Czech": "cs",
"Welsh": "cy",
"Danish": "da",
"German": "de",
"Greek": "el",
"English": "en",
"Spanish": "es",
"Estonian": "et",
"Persian": "fa",
"Finnish": "fi",
"French": "fr",
"Irish": "ga",
"Galician": "gl",
"Gujarati": "gu",
"Hebrew": "he",
"Hindi": "hi",
"Croatian": "hr",
"Hungarian": "hu",
"Armenian": "hy",
"Indonesian": "id",
"Icelandic": "is",
"Italian": "it",
"Japanese": "ja",
"Georgian": "ka",
"Kazakh": "kk",
"Khmer": "km",
"Kannada": "kn",
"Korean": "ko",
"Lithuanian": "lt",
"Latvian": "lv",
"Macedonian": "mk",
"Malayalam": "ml",
"Mongolian": "mn",
"Marathi": "mr",
"Malay": "ms",
"Maltese": "mt",
"Nepali": "ne",
"Dutch": "nl",
"Norwegian": "no",
"Odia": "or",
"Punjabi": "pa",
"Polish": "pl",
"Portuguese": "pt",
"Romanian": "ro",
"Russian": "ru",
"Sinhala": "si",
"Slovak": "sk",
"Slovenian": "sl",
"Albanian": "sq",
"Serbian": "sr",
"Swedish": "sv",
"Swahili": "sw",
"Tamil": "ta",
"Telugu": "te",
"Thai": "th",
"Turkish": "tr",
"Ukrainian": "uk",
"Urdu": "ur",
"Vietnamese": "vi",
"Chinese": "zh"
}
def transcribe_audio(audio_file_path, model_size='base', language="en"):
model = whisper.load_model(model_size)
model.to("cpu")
result = model.transcribe(audio_file_path, language=language)
transcription = result["text"]
segments = result["segments"]
try:
from whisper.utils import format_srt
srt_text = format_srt(segments)
except Exception:
srt_text = generate_srt(segments)
return transcription, srt_text, segments
def generate_srt(segments):
import datetime
import srt
subtitles = []
for i, seg in enumerate(segments):
start_td = datetime.timedelta(seconds=seg["start"])
end_td = datetime.timedelta(seconds=seg["end"])
subtitle = srt.Subtitle(index=i+1, start=start_td, end=end_td, content=seg["text"])
subtitles.append(subtitle)
return srt.compose(subtitles)
def prepare_chapter_prompt(srt_text):
system_prompt = (
"You are a highly skilled video content segmentation and optimization expert. "
"Your task is to analyze a transcript of a YouTube video provided in SRT format and produce engaging and concise chapter headers. "
"Each chapter header must be on its own line in the exact format: 'mm:ss Chapter Title'.\n\n"
"- 'mm:ss' represents the starting time of the chapter (minutes and seconds).\n"
"- 'Chapter Title' must be a catchy, audience-friendly title that summarizes the key idea or transition at that point in the video.\n\n"
"IMPORTANT: Although these instructions are in English, please ensure that your output is in the same language as the provided SRT transcript."
)
user_prompt = (
"Below is the transcript of a YouTube video in SRT format:\n\n"
"```\n"
f"{srt_text}\n"
"```\n\n"
"Please generate only the chapter breakdown using the guidelines above. "
"Each chapter header should be formatted as:\n"
"mm:ss Chapter Title"
)
return system_prompt + "\n\n" + user_prompt
def format_prompt_html(prompt):
"""
Displays the prompt in a read-only textarea using Gradio's color variables for background and text.
Includes a 'Copy Prompt' button (blue) and a short 'Prompt Copied!' confirmation message.
"""
html_content = f"""
<div style="display: flex; flex-direction: column; gap: 10px; margin-top: 10px;">
<textarea id="prompt_text" rows="10"
style="width: 100%; resize: vertical;
background-color: var(--block-background-fill);
color: var(--block-text-color);
border: 1px solid var(--block-border-color);
border-radius: 4px;"
readonly>{prompt}</textarea>
<button
style="width: 150px; padding: 8px;
background-color: #007bff;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;"
onclick="
navigator.clipboard.writeText(document.getElementById('prompt_text').value);
const copiedMsg = document.getElementById('copied_msg');
copiedMsg.style.display = 'inline';
setTimeout(() => copiedMsg.style.display = 'none', 2000);
">
Copy Prompt
</button>
<span id="copied_msg" style="display: none; color: var(--primary-text-color); font-weight: bold;">Prompt Copied!</span>
</div>
"""
return html_content
def process_audio(audio, language_name):
lang_code = LANGUAGE_OPTIONS.get(language_name, "en")
try:
transcription, srt_text, segments = transcribe_audio(audio, model_size='base', language=lang_code)
except Exception as e:
return f"Error during transcription: {str(e)}", "", ""
chapter_prompt = prepare_chapter_prompt(srt_text)
prompt_html = format_prompt_html(chapter_prompt)
return transcription, srt_text, prompt_html
iface = gr.Interface(
fn=process_audio,
inputs=[
gr.Audio(type="filepath", label="Upload Audio"),
gr.Dropdown(choices=list(LANGUAGE_OPTIONS.keys()), label="Audio Language", value="English")
],
outputs=[
gr.Textbox(label="Full Transcription", lines=10),
gr.Textbox(label="SRT File Content", lines=10),
gr.HTML(label="Prepared Chapter Prompt (Copy & Paste into ChatGPT)")
],
title="Video Chapter Splitter from Audio (MP3)",
description=(
"Upload an audio file (e.g., MP3) of your YouTube video and select the audio language. "
"The app will transcribe the audio using Whisper, generate subtitles in SRT format, "
"and prepare a single, complete prompt that instructs ChatGPT/GPT-4 to generate a chapter breakdown in the format 'mm:ss Chapter Title'.\n\n"
"Click the 'Copy Prompt' button to copy the entire prompt, and a brief 'Prompt Copied!' message will appear."
)
)
if __name__ == "__main__":
iface.launch()