Spaces:

NeuralFalcon
/

Kokoro-TTS-Subtitle

Running

App Files Files Community

NeuralFalcon commited on Jun 24

Commit

7d04582

verified ·

1 Parent(s): 5d51aed

Update app.py

Browse files

Files changed (1) hide show

app.py +346 -48

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 # Initalize a pipeline
 from kokoro import KPipeline
 # from IPython.display import display, Audio
@@ -6,24 +5,38 @@ from kokoro import KPipeline
 import os
 from huggingface_hub import list_repo_files
 import uuid
-import re
 import gradio as gr
-#translate langauge
 from deep_translator import GoogleTranslator
-def bulk_translate(text, target_language, chunk_size=500):
-    language_map_local = {
-    "American English": "en",
-    "British English": "en",
-    "Hindi": "hi",
-    "Spanish": "es",
-    "French": "fr",
-    "Italian": "it",
-    "Brazilian Portuguese": "pt",
-    "Japanese": "ja",
-    "Mandarin Chinese": "zh-CN"
-    }
     # lang_code = GoogleTranslator().get_supported_languages(as_dict=True).get(target_language.lower())
     lang_code=language_map_local[target_language]
     sentences = re.split(r'(?<=[.!?])\s+', text)  # Split text into sentences
@@ -43,7 +56,7 @@ def bulk_translate(text, target_language, chunk_size=500):
     translated_chunks = [GoogleTranslator(target=lang_code).translate(chunk) for chunk in chunks]
     result=" ".join(translated_chunks)
     return result.strip()
 # Language mapping dictionary
 language_map = {
     "American English": "a",
@@ -67,7 +80,7 @@ def update_pipeline(Language):
     # Only update if the language is different
     if new_lang != last_used_language:
         pipeline = KPipeline(lang_code=new_lang)
-        last_used_language = new_lang
         try:
             pipeline = KPipeline(lang_code=new_lang)
             last_used_language = new_lang  # Update last used language
@@ -125,7 +138,7 @@ def clean_text(text):
         r'[\U00002702-\U000027B0]|'  # Dingbats
         r'[\U0001F1E0-\U0001F1FF]'   # Flags (iOS)
         r'', flags=re.UNICODE)
     text = emoji_pattern.sub(r'', text)
     # Remove multiple spaces and extra line breaks
@@ -139,13 +152,13 @@ def tts_file_name(text,language):
     text = re.sub(r'[^a-zA-Z\s]', '', text)  # Retain only alphabets and spaces
     text = text.lower().strip()             # Convert to lowercase and strip leading/trailing spaces
     text = text.replace(" ", "_")           # Replace spaces with underscores
-    language=language.replace(" ", "_").strip()
     # Truncate or handle empty text
     truncated_text = text[:20] if len(text) > 20 else text if len(text) > 0 else language
     # Generate a random string for uniqueness
     random_string = uuid.uuid4().hex[:8].upper()
     # Construct the file name
     file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
     return file_name
@@ -166,7 +179,7 @@ def remove_silence_function(file_path,minimum_silence=50):
     audio_chunks = split_on_silence(sound,
                                     min_silence_len=100,
                                     silence_thresh=-45,
-                                    keep_silence=minimum_silence)
     # Putting the file back together
     combined = AudioSegment.empty()
     for chunk in audio_chunks:
@@ -205,7 +218,7 @@ def generate_and_save_audio(text, Language="American English",voice="af_bella",
           duration_sec = len(audio_np) / 24000
           timestamps[i]["duration"] = duration_sec
           wav_file.writeframes(audio_bytes)
-    if remove_silence:
       keep_silence = int(keep_silence_up_to * 1000)
       new_wave_file=remove_silence_function(save_path,minimum_silence=keep_silence)
       return new_wave_file,timestamps
@@ -257,7 +270,7 @@ def write_word_srt(word_level_timestamps, output_file="word.srt", skip_punctuati
         for entry in word_level_timestamps:
             word = entry["word"]
             # Skip punctuation if enabled
             if skip_punctuation and all(char in string.punctuation for char in word):
                 continue
@@ -320,13 +333,13 @@ def write_sentence_srt(word_level_timestamps, output_file="subtitles.srt", max_w
         # Skip selected punctuation from remove_punctuation list
         if word in remove_punctuation:
-            continue
         # Attach punctuation to the previous word
         if word in string.punctuation:
             if subtitle_words:
                 subtitle_words[-1] = (subtitle_words[-1][0] + word, subtitle_words[-1][1])
-            continue
         # Start a new subtitle block if needed
         if start_time is None:
@@ -383,16 +396,16 @@ import re
 def fix_punctuation(text):
     # Remove spaces before punctuation marks (., ?, !, ,)
     text = re.sub(r'\s([.,?!])', r'\1', text)
     # Handle quotation marks: remove spaces before and after them
     text = text.replace('" ', '"')
     text = text.replace(' "', '"')
     text = text.replace('" ', '"')
     # Track quotation marks to add space after closing quotes
     track = 0
     result = []
     for index, char in enumerate(text):
         if char == '"':
             track += 1
@@ -495,10 +508,9 @@ def save_current_data():
     if os.path.exists("./last"):
         shutil.rmtree("./last")
     os.makedirs("./last",exist_ok=True)
 def KOKORO_TTS_API(text, Language="American English",voice="af_bella", speed=1,translate_text=False,remove_silence=False,keep_silence_up_to=0.05):
-    if translate_text:
         text=bulk_translate(text, Language, chunk_size=500)
     save_path,timestamps=generate_and_save_audio(text=text, Language=Language,voice=voice, speed=speed,remove_silence=remove_silence,keep_silence_up_to=keep_silence_up_to)
     if remove_silence==False:
@@ -516,16 +528,15 @@ def KOKORO_TTS_API(text, Language="American English",voice="af_bella", speed=1,t
             shutil.copy(normal_srt, "./last/")
             shutil.copy(json_file, "./last/")
             return save_path,save_path,word_level_srt,normal_srt,json_file
-    return save_path,save_path,None,None,None
-def ui():
-    def toggle_autoplay(autoplay):
         return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
     # Define examples in the format you mentioned
     dummy_examples = [
         ["Hey, y'all, let’s grab some coffee and catch up!", "American English", "af_bella"],
@@ -538,17 +549,16 @@ def ui():
         ["こんにちは、お元気ですか？", "Japanese", "jf_nezumi"],
         ["你好，你怎么样?", "Mandarin Chinese", "zf_xiaoni"]
     ]
     with gr.Blocks() as demo:
         # gr.Markdown("<center><h1 style='font-size: 40px;'>KOKORO TTS</h1></center>")  # Larger title with CSS
-        gr.Markdown("[Install on Your Local System](https://github.com/NeuralFalconYT/kokoro_v1)")
-        lang_list = ['American English', 'British English', 'Hindi', 'Spanish', 'French', 'Italian', 'Brazilian Portuguese', 'Japanese', 'Mandarin Chinese']
-        voice_names = get_voice_names("hexgrad/Kokoro-82M")
         with gr.Row():
             with gr.Column():
                 text = gr.Textbox(label='📝 Enter Text', lines=3)
                 with gr.Row():
                     language_name = gr.Dropdown(lang_list, label="🌍 Select Language", value=lang_list[0])
@@ -588,7 +598,7 @@ def tutorial():
     # Markdown explanation for language code
     explanation = """
     ## Language Code Explanation:
-    Example: `'af_bella'`
     - **'a'** stands for **American English**.
     - **'f_'** stands for **Female** (If it were 'm_', it would mean Male).
     - **'bella'** refers to the specific voice.
@@ -609,11 +619,298 @@ def tutorial():
     - **"m_"**: Male
     """
     with gr.Blocks() as demo2:
-        gr.Markdown("[Install on Your Local System](https://github.com/NeuralFalconYT/kokoro_v1)")
         gr.Markdown(explanation)  # Display the explanation
     return demo2
 import click
 @click.command()
@@ -622,8 +919,9 @@ import click
 def main(debug, share):
 # def main(debug=True, share=True):
     demo1 = ui()
-    demo2 = tutorial()
-    demo = gr.TabbedInterface([demo1, demo2],["Multilingual TTS","VoicePack Explanation"],title="Kokoro TTS")#,theme='JohnSmith9982/small_and_pretty')
     demo.queue().launch(debug=debug, share=share)
     # demo.queue().launch(debug=debug, share=share,server_port=9000)
     #Run on local network
@@ -638,4 +936,4 @@ last_used_language = "a"
 pipeline = KPipeline(lang_code=last_used_language)
 temp_folder = create_audio_dir()
 if __name__ == "__main__":
-    main()

 # Initalize a pipeline
 from kokoro import KPipeline
 # from IPython.display import display, Audio
 import os
 from huggingface_hub import list_repo_files
 import uuid
+import re
 import gradio as gr
+#translate langauge
 from deep_translator import GoogleTranslator
+language_map_local = {
+"American English": "en",
+"British English": "en",
+"Hindi": "hi",
+"Spanish": "es",
+"French": "fr",
+"Italian": "it",
+"Brazilian Portuguese": "pt",
+"Japanese": "ja",
+"Mandarin Chinese": "zh-CN"
+}
+def bulk_translate(text, target_language, chunk_size=500,MAX_ALLOWED_CHARACTERS = 10000):
+    if len(text)>=MAX_ALLOWED_CHARACTERS:
+      gr.Warning("[WARNING] Text too long — skipping translation to prevent Google Translate abuse.")
+      return text
+    # language_map_local = {
+    # "American English": "en",
+    # "British English": "en",
+    # "Hindi": "hi",
+    # "Spanish": "es",
+    # "French": "fr",
+    # "Italian": "it",
+    # "Brazilian Portuguese": "pt",
+    # "Japanese": "ja",
+    # "Mandarin Chinese": "zh-CN"
+    # }
     # lang_code = GoogleTranslator().get_supported_languages(as_dict=True).get(target_language.lower())
     lang_code=language_map_local[target_language]
     sentences = re.split(r'(?<=[.!?])\s+', text)  # Split text into sentences
     translated_chunks = [GoogleTranslator(target=lang_code).translate(chunk) for chunk in chunks]
     result=" ".join(translated_chunks)
     return result.strip()
 # Language mapping dictionary
 language_map = {
     "American English": "a",
     # Only update if the language is different
     if new_lang != last_used_language:
         pipeline = KPipeline(lang_code=new_lang)
+        last_used_language = new_lang
         try:
             pipeline = KPipeline(lang_code=new_lang)
             last_used_language = new_lang  # Update last used language
         r'[\U00002702-\U000027B0]|'  # Dingbats
         r'[\U0001F1E0-\U0001F1FF]'   # Flags (iOS)
         r'', flags=re.UNICODE)
     text = emoji_pattern.sub(r'', text)
     # Remove multiple spaces and extra line breaks
     text = re.sub(r'[^a-zA-Z\s]', '', text)  # Retain only alphabets and spaces
     text = text.lower().strip()             # Convert to lowercase and strip leading/trailing spaces
     text = text.replace(" ", "_")           # Replace spaces with underscores
+    language=language.replace(" ", "_").strip()
     # Truncate or handle empty text
     truncated_text = text[:20] if len(text) > 20 else text if len(text) > 0 else language
     # Generate a random string for uniqueness
     random_string = uuid.uuid4().hex[:8].upper()
     # Construct the file name
     file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
     return file_name
     audio_chunks = split_on_silence(sound,
                                     min_silence_len=100,
                                     silence_thresh=-45,
+                                    keep_silence=minimum_silence)
     # Putting the file back together
     combined = AudioSegment.empty()
     for chunk in audio_chunks:
           duration_sec = len(audio_np) / 24000
           timestamps[i]["duration"] = duration_sec
           wav_file.writeframes(audio_bytes)
+    if remove_silence:
       keep_silence = int(keep_silence_up_to * 1000)
       new_wave_file=remove_silence_function(save_path,minimum_silence=keep_silence)
       return new_wave_file,timestamps
         for entry in word_level_timestamps:
             word = entry["word"]
             # Skip punctuation if enabled
             if skip_punctuation and all(char in string.punctuation for char in word):
                 continue
         # Skip selected punctuation from remove_punctuation list
         if word in remove_punctuation:
+            continue
         # Attach punctuation to the previous word
         if word in string.punctuation:
             if subtitle_words:
                 subtitle_words[-1] = (subtitle_words[-1][0] + word, subtitle_words[-1][1])
+            continue
         # Start a new subtitle block if needed
         if start_time is None:
 def fix_punctuation(text):
     # Remove spaces before punctuation marks (., ?, !, ,)
     text = re.sub(r'\s([.,?!])', r'\1', text)
     # Handle quotation marks: remove spaces before and after them
     text = text.replace('" ', '"')
     text = text.replace(' "', '"')
     text = text.replace('" ', '"')
     # Track quotation marks to add space after closing quotes
     track = 0
     result = []
     for index, char in enumerate(text):
         if char == '"':
             track += 1
     if os.path.exists("./last"):
         shutil.rmtree("./last")
     os.makedirs("./last",exist_ok=True)
 def KOKORO_TTS_API(text, Language="American English",voice="af_bella", speed=1,translate_text=False,remove_silence=False,keep_silence_up_to=0.05):
+    if translate_text:
         text=bulk_translate(text, Language, chunk_size=500)
     save_path,timestamps=generate_and_save_audio(text=text, Language=Language,voice=voice, speed=speed,remove_silence=remove_silence,keep_silence_up_to=keep_silence_up_to)
     if remove_silence==False:
             shutil.copy(normal_srt, "./last/")
             shutil.copy(json_file, "./last/")
             return save_path,save_path,word_level_srt,normal_srt,json_file
+    return save_path,save_path,None,None,None
+def toggle_autoplay(autoplay):
         return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
+lang_list = ['American English', 'British English', 'Hindi', 'Spanish', 'French', 'Italian', 'Brazilian Portuguese', 'Japanese', 'Mandarin Chinese']
+voice_names = get_voice_names("hexgrad/Kokoro-82M")
+def ui():
     # Define examples in the format you mentioned
     dummy_examples = [
         ["Hey, y'all, let’s grab some coffee and catch up!", "American English", "af_bella"],
         ["こんにちは、お元気ですか？", "Japanese", "jf_nezumi"],
         ["你好，你怎么样?", "Mandarin Chinese", "zf_xiaoni"]
     ]
     with gr.Blocks() as demo:
         # gr.Markdown("<center><h1 style='font-size: 40px;'>KOKORO TTS</h1></center>")  # Larger title with CSS
+        # gr.Markdown("[Install on Your Local System](https://github.com/NeuralFalconYT/kokoro_v1)")
         with gr.Row():
             with gr.Column():
                 text = gr.Textbox(label='📝 Enter Text', lines=3)
                 with gr.Row():
                     language_name = gr.Dropdown(lang_list, label="🌍 Select Language", value=lang_list[0])
     # Markdown explanation for language code
     explanation = """
     ## Language Code Explanation:
+    Example: `'af_bella'`
     - **'a'** stands for **American English**.
     - **'f_'** stands for **Female** (If it were 'm_', it would mean Male).
     - **'bella'** refers to the specific voice.
     - **"m_"**: Male
     """
     with gr.Blocks() as demo2:
+        # gr.Markdown("[Install on Your Local System](https://github.com/NeuralFalconYT/kokoro_v1)")
         gr.Markdown(explanation)  # Display the explanation
     return demo2
+#@title subtitle
+import os
+import re
+import uuid
+import shutil
+import platform
+import datetime
+import subprocess
+import pysrt
+import librosa
+import soundfile as sf
+from tqdm.auto import tqdm
+from pydub import AudioSegment
+from deep_translator import GoogleTranslator
+# ---------------------- Utility Functions ----------------------
+def get_current_time():
+    return datetime.datetime.now().strftime("%I_%M_%p")
+def get_subtitle_Dub_path(srt_file_path, Language):
+    file_name = os.path.splitext(os.path.basename(srt_file_path))[0]
+    full_base_path = os.path.join(os.getcwd(), "TTS_DUB")
+    os.makedirs(full_base_path, exist_ok=True)
+    random_string = str(uuid.uuid4())[:6]
+    lang = language_map_local.get(Language, Language.replace(" ", "_"))
+    new_path = os.path.join(full_base_path, f"{file_name}_{lang}_{random_string}.wav")
+    return new_path.replace("__", "_")
+def clean_srt(input_path):
+    def clean_srt_line(text):
+        for bad in ["[", "]", "♫"]:
+            text = text.replace(bad, "")
+        return text.strip()
+    subs = pysrt.open(input_path, encoding='utf-8')
+    output_path = input_path.lower().replace(".srt", "") + "_.srt"
+    with open(output_path, "w", encoding='utf-8') as file:
+        for sub in subs:
+            file.write(f"{sub.index}\n{sub.start} --> {sub.end}\n{clean_srt_line(sub.text)}\n\n")
+    return output_path
+def translate_srt(input_path, target_language="Hindi", max_segments=500, chunk_size=4000):
+    output_path = input_path.replace(".srt", f"{target_language}.srt")
+    subs = pysrt.open(input_path, encoding='utf-8')
+    if len(subs) > max_segments:
+        gr.Warning(f"Too many segments: {len(subs)} > {max_segments}. Skipping translation.")
+        return input_path
+    original = [f"<#{i}>{s.text}" for i, s in enumerate(subs)]
+    full_text = "\n".join(original)
+    chunks, start = [], 0
+    while start < len(full_text):
+        end = start + chunk_size
+        split_point = full_text.rfind("<#", start, end) if end < len(full_text) else len(full_text)
+        chunks.append(full_text[start:split_point])
+        start = split_point
+    lang_code = language_map_local.get(target_language, "en")
+    translated_chunks = [GoogleTranslator(target=lang_code).translate(chunk) for chunk in chunks]
+    translated_text = "\n".join(translated_chunks)
+    pattern = re.compile(r"<#(\d+)>(.*?)(?=<#\d+>|$)", re.DOTALL)
+    translated_dict = {int(i): txt.strip() for i, txt in pattern.findall(translated_text)}
+    for i, sub in enumerate(subs):
+        sub.text = translated_dict.get(i, sub.text)
+    subs.save(output_path, encoding='utf-8')
+    return output_path
+def prepare_srt(srt_path, target_language, translate=False):
+    path = clean_srt(srt_path)
+    return translate_srt(path, target_language) if translate else path
+def is_ffmpeg_installed():
+    ffmpeg_exe = "ffmpeg.exe" if platform.system() == "Windows" else "ffmpeg"
+    try:
+        subprocess.run([ffmpeg_exe, "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+        return True, ffmpeg_exe
+    except Exception:
+        gr.Warning("FFmpeg not found. Falling back to librosa for audio speedup.", duration=20)
+        return False, ffmpeg_exe
+def speedup_audio_librosa(input_file, output_file, speedup_factor):
+    try:
+        y, sr = librosa.load(input_file, sr=None)
+        y_stretched = librosa.effects.time_stretch(y, rate=speedup_factor)
+        sf.write(output_file, y_stretched, sr)
+    except Exception as e:
+        gr.Warning(f"Librosa speedup failed: {e}")
+        shutil.copy(input_file, output_file)
+def change_speed(input_file, output_file, speedup_factor, use_ffmpeg, ffmpeg_path):
+    if use_ffmpeg:
+        try:
+            subprocess.run([ffmpeg_path, "-i", input_file, "-filter:a", f"atempo={speedup_factor}", output_file, "-y"], check=True)
+        except Exception as e:
+            gr.Error(f"FFmpeg speedup error: {e}")
+            speedup_audio_librosa(input_file, output_file, speedup_factor)
+    else:
+        speedup_audio_librosa(input_file, output_file, speedup_factor)
+def remove_edge_silence(input_path, output_path):
+    y, sr = librosa.load(input_path, sr=None)
+    trimmed_audio, _ = librosa.effects.trim(y, top_db=30)
+    sf.write(output_path, trimmed_audio, sr)
+    return output_path
+# ---------------------- Main Class ----------------------
+class SRTDubbing:
+    def __init__(self, use_ffmpeg=True, ffmpeg_path="ffmpeg"):
+        self.use_ffmpeg = use_ffmpeg
+        self.ffmpeg_path = ffmpeg_path
+        self.cache_dir = "./cache"
+        os.makedirs("./dummy", exist_ok=True)
+        os.makedirs(self.cache_dir, exist_ok=True)
+    @staticmethod
+    def convert_to_millisecond(t):
+        return t.hours * 3600000 + t.minutes * 60000 + t.seconds * 1000 + int(t.milliseconds)
+    @staticmethod
+    def read_srt_file(file_path):
+        subs = pysrt.open(file_path, encoding='utf-8')
+        entries = []
+        prev_end = 0
+        for idx, sub in enumerate(subs, 1):
+            start, end = SRTDubbing.convert_to_millisecond(sub.start), SRTDubbing.convert_to_millisecond(sub.end)
+            pause = start - prev_end if idx > 1 else start
+            entries.append({
+                'entry_number': idx,
+                'start_time': start,
+                'end_time': end,
+                'text': sub.text.strip(),
+                'pause_time': pause,
+                'audio_name': f"{idx}.wav",
+                'previous_pause': f"{idx}_before_pause.wav",
+            })
+            prev_end = end
+        return entries
+    def text_to_speech_srt(self, text, audio_path, language, voice, actual_duration):
+        temp = "./cache/temp.wav"
+        # Step 1: Generate initial audio
+        path, _ = generate_and_save_audio(text, Language=language, voice=voice, speed=1, remove_silence=False, keep_silence_up_to=0.05)
+        # ✂️ Remove leading and trailing silence to make timing tight without trimming actual speech.
+        remove_edge_silence(path, temp)
+        # 📏 Load the trimmed audio and get its duration in milliseconds.
+        audio = AudioSegment.from_file(temp)
+        # ⏱️ If no duration is specified (edge case), use the TTS as-is without speed/timing adjustments.
+        if actual_duration == 0:
+            shutil.move(temp, audio_path)
+            return
+        # Step 2: If TTS audio is longer, retry with remove_silence=True
+        if len(audio) > actual_duration:
+            path, _ = generate_and_save_audio(text, Language=language, voice=voice, speed=1, remove_silence=True, keep_silence_up_to=0.05)
+            remove_edge_silence(path, temp)
+            audio = AudioSegment.from_file(temp)
+        # Step 3: If still longer → speed up
+        if len(audio) > actual_duration:
+            factor = len(audio) / actual_duration
+            path, _ = generate_and_save_audio(text, Language=language, voice=voice, speed=factor, remove_silence=True, keep_silence_up_to=0.05)
+            remove_edge_silence(path, temp)
+            audio = AudioSegment.from_file(temp)
+        # Final Adjustment: Speed up via FFmpeg or librosa
+        if len(audio) > actual_duration:
+            factor = len(audio) / actual_duration
+            final_temp = "./cache/speedup_temp.wav"
+            change_speed(temp, final_temp, factor, self.use_ffmpeg, self.ffmpeg_path)
+            shutil.move(final_temp, audio_path)
+        # Add silence if too short
+        elif len(audio) < actual_duration:
+            silence = AudioSegment.silent(duration=actual_duration - len(audio))
+            (audio + silence).export(audio_path, format="wav")
+        # ➡️ Fallback: If TTS already perfectly matches subtitle duration, save as-is.
+        else:
+            shutil.move(temp, audio_path) #bad code
+    @staticmethod
+    def make_silence(duration, path):
+        AudioSegment.silent(duration=duration).export(path, format="wav")
+    @staticmethod
+    def create_folder_for_srt(srt_file_path):
+        base = os.path.splitext(os.path.basename(srt_file_path))[0]
+        folder = f"./dummy/{base}_{str(uuid.uuid4())[:4]}"
+        os.makedirs(folder, exist_ok=True)
+        return folder
+    @staticmethod
+    def concatenate_audio_files(paths, output):
+        audio = sum([AudioSegment.from_file(p) for p in paths], AudioSegment.silent(duration=0))
+        audio.export(output, format="wav")
+    def srt_to_dub(self, srt_path, output_path, language, voice):
+        entries = self.read_srt_file(srt_path)
+        folder = self.create_folder_for_srt(srt_path)
+        all_audio = []
+        for entry in tqdm(entries):
+            self.make_silence(entry['pause_time'], os.path.join(folder, entry['previous_pause']))
+            all_audio.append(os.path.join(folder, entry['previous_pause']))
+            tts_path = os.path.join(folder, entry['audio_name'])
+            self.text_to_speech_srt(entry['text'], tts_path, language, voice, entry['end_time'] - entry['start_time'])
+            all_audio.append(tts_path)
+        self.concatenate_audio_files(all_audio, output_path)
+# ---------------------- Entrypoint ----------------------
+def srt_process(srt_path, Language="American English", voice_name="af_bella", translate=False):
+    if not srt_path.endswith(".srt"):
+        gr.Error("Please upload a valid .srt file", duration=5)
+        return None
+    use_ffmpeg, ffmpeg_path = is_ffmpeg_installed()
+    processed_srt = prepare_srt(srt_path, Language, translate)
+    output_path = get_subtitle_Dub_path(srt_path, Language)
+    SRTDubbing(use_ffmpeg, ffmpeg_path).srt_to_dub(processed_srt, output_path, Language, voice_name)
+    return output_path
+def subtitle_ui():
+  with gr.Blocks() as demo:
+      gr.Markdown(
+          """
+          # Generate Audio File From Subtitle [Upload Only .srt file]
+          To generate subtitles, you can use the [Whisper Turbo Subtitle](https://github.com/NeuralFalconYT/Whisper-Turbo-Subtitle)
+          """
+      )
+      with gr.Row():
+          with gr.Column():
+              srt_file = gr.File(label='Upload .srt Subtitle File Only')
+              with gr.Row():
+                      language_name = gr.Dropdown(lang_list, label="🌍 Select Language", value=lang_list[0])
+              with gr.Row():
+                  voice = gr.Dropdown(
+                      voice_names,
+                      value='af_bella',
+                      allow_custom_value=False,
+                      label='🎙️ Choose VoicePack',
+                  )
+              with gr.Row():
+                  generate_btn_ = gr.Button('Generate', variant='primary')
+              with gr.Accordion('Other Settings', open=False):
+                  translate_text = gr.Checkbox(value=False, label='🌐 Translate Text to Selected Language')
+          with gr.Column():
+              audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
+              with gr.Accordion('Enable Autoplay', open=False):
+                  autoplay = gr.Checkbox(value=True, label='Autoplay')
+                  autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
+      # srt_file.submit(
+      #     srt_process,
+      #     inputs=[srt_file, voice],
+      #     outputs=[audio]
+      # )
+      generate_btn_.click(
+          srt_process,
+          inputs=[srt_file,language_name,voice,translate_text],
+          outputs=[audio]
+      )
+      return demo
+# Example usage:
+# srt_file_path = "/content/me.srt"
+# dub_audio_path = srt_process(srt_file_path, Language="American English", voice_name="af_bella", translate=False)
+# print(f"Audio file saved at: {dub_audio_path}")
 import click
 @click.command()
 def main(debug, share):
 # def main(debug=True, share=True):
     demo1 = ui()
+    demo2 = subtitle_ui()
+    demo3 = tutorial()
+    demo = gr.TabbedInterface([demo1, demo2,demo3],["Multilingual TTS","SRT Dubbing","VoicePack Explanation"],title="Kokoro TTS")#,theme='JohnSmith9982/small_and_pretty')
     demo.queue().launch(debug=debug, share=share)
     # demo.queue().launch(debug=debug, share=share,server_port=9000)
     #Run on local network
 pipeline = KPipeline(lang_code=last_used_language)
 temp_folder = create_audio_dir()
 if __name__ == "__main__":
+    main()