Spaces:

peteralexandercharles
/

Auto-Subtitled-Video-Generator

Runtime error

App Files Files Community

peteralexandercharles commited on Jan 2, 2023

Commit

a590feb

1 Parent(s): 56e52b4

Upload 7 files

Browse files

Files changed (3) hide show

01_🎥_Input_YouTube_Link.py +84 -82
LICENSE.txt +21 -0
README.md +11 -21

01_🎥_Input_YouTube_Link.py CHANGED Viewed

@@ -11,39 +11,17 @@ from io import StringIO
 from utils import write_vtt, write_srt
 import ffmpeg
 from languages import LANGUAGES
-import torch
-from zipfile import ZipFile
-from io import BytesIO
-import base64
-import pathlib
-import re
 st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
-torch.cuda.is_available()
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Model options: tiny, base, small, medium, large
-loaded_model = whisper.load_model("small", device=DEVICE)
 # Define a function that we can use to load lottie files from a link.
-@st.cache(allow_output_mutation=True)
 def load_lottieurl(url: str):
     r = requests.get(url)
     if r.status_code != 200:
         return None
     return r.json()
-APP_DIR = pathlib.Path(__file__).parent.absolute()
-LOCAL_DIR = APP_DIR / "local_youtube"
-LOCAL_DIR.mkdir(exist_ok=True)
-save_dir = LOCAL_DIR / "output"
-save_dir.mkdir(exist_ok=True)
 col1, col2 = st.columns([1, 3])
 with col1:
     lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
@@ -58,6 +36,7 @@ with col2:
     ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
 def populate_metadata(link):
     yt = YouTube(link)
     author = yt.author
@@ -69,6 +48,7 @@ def populate_metadata(link):
     return author, title, description, thumbnail, length, views
 def download_video(link):
     yt = YouTube(link)
     video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
@@ -79,9 +59,23 @@ def convert(seconds):
     return time.strftime("%H:%M:%S", time.gmtime(seconds))
 def inference(link, loaded_model, task):
     yt = YouTube(link)
-    path = yt.streams.filter(only_audio=True)[0].download(filename=f"{save_dir}/audio.mp3")
     if task == "Transcribe":
         options = dict(task="transcribe", best_of=5)
         results = loaded_model.transcribe(path, **options)
@@ -100,6 +94,7 @@ def inference(link, loaded_model, task):
         raise ValueError("Task not supported")
 def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
     segmentStream = StringIO()
@@ -125,34 +120,35 @@ def get_language_code(language):
 def generate_subtitled_video(video, audio, transcript):
     video_file = ffmpeg.input(video)
     audio_file = ffmpeg.input(audio)
-    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("youtube_sub.mp4").run(quiet=True, overwrite_output=True)
-    video_with_subs = open("youtube_sub.mp4", "rb")
     return video_with_subs
 def main():
-    link = st.text_input("YouTube Link (The longer the video, the longer the processing time)", placeholder="Input YouTube link and press enter")
     task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
     if task == "Transcribe":
         if st.button("Transcribe"):
             author, title, description, thumbnail, length, views = populate_metadata(link)
-            with st.spinner("Transcribing the video..."):
-                results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
             with col3:
                 st.video(video)
-            # Split result["text"]  on !,? and . , but save the punctuation
-            sentences = re.split("([!?.])", results[0])
-            # Join the punctuation back to the sentences
-            sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
-            text = "\n\n".join(sentences)
             with open("transcript.txt", "w+", encoding='utf8') as f:
-                f.writelines(text)
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
@@ -168,48 +164,50 @@ def main():
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
             with col4:
                 with st.spinner("Generating Subtitled Video"):
-                    video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
-            zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
-            zipObj.write("transcript.txt")
-            zipObj.write("transcript.vtt")
-            zipObj.write("transcript.srt")
-            zipObj.write("youtube_sub.mp4")
-            zipObj.close()
-            ZipfileDotZip = "YouTube_transcripts_and_video.zip"
-            with open(ZipfileDotZip, "rb") as f:
-                datazip = f.read()
-                b64 = base64.b64encode(datazip).decode()
-                href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
-        Download Transcripts and Video\
-    </a>"
-            st.markdown(href, unsafe_allow_html=True)
     elif task == "Translate":
         if st.button("Translate to English"):
             author, title, description, thumbnail, length, views = populate_metadata(link)
-            with st.spinner("Translating to English..."):
-                results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
             with col3:
                 st.video(video)
-            # Split result["text"]  on !,? and . , but save the punctuation
-            sentences = re.split("([!?.])", results[0])
-            # Join the punctuation back to the sentences
-            sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
-            text = "\n\n".join(sentences)
             with open("transcript.txt", "w+", encoding='utf8') as f:
-                f.writelines(text)
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
@@ -225,32 +223,36 @@ def main():
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
             with col4:
                 with st.spinner("Generating Subtitled Video"):
-                    video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
-            zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
-            zipObj.write("transcript.txt")
-            zipObj.write("transcript.vtt")
-            zipObj.write("transcript.srt")
-            zipObj.write("youtube_sub.mp4")
-            zipObj.close()
-            ZipfileDotZip = "YouTube_transcripts_and_video.zip"
-            with open(ZipfileDotZip, "rb") as f:
-                datazip = f.read()
-                b64 = base64.b64encode(datazip).decode()
-                href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
-        Download Transcripts and Video\
-    </a>"
-            st.markdown(href, unsafe_allow_html=True)
     else:
-        st.info("Please select a task.")
 if __name__ == "__main__":
     main()

 from utils import write_vtt, write_srt
 import ffmpeg
 from languages import LANGUAGES
 st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
 # Define a function that we can use to load lottie files from a link.
+@st.cache()
 def load_lottieurl(url: str):
     r = requests.get(url)
     if r.status_code != 200:
         return None
     return r.json()
 col1, col2 = st.columns([1, 3])
 with col1:
     lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
     ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
+@st.cache(allow_output_mutation=True)
 def populate_metadata(link):
     yt = YouTube(link)
     author = yt.author
     return author, title, description, thumbnail, length, views
+@st.cache(allow_output_mutation=True)
 def download_video(link):
     yt = YouTube(link)
     video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
     return time.strftime("%H:%M:%S", time.gmtime(seconds))
+loaded_model = whisper.load_model("base")
+current_size = "None"
+@st.cache(allow_output_mutation=True)
+def change_model(current_size, size):
+    if current_size != size:
+        loaded_model = whisper.load_model(size)
+        return loaded_model
+    else:
+        raise Exception("Model size is the same as the current size.")
+@st.cache(allow_output_mutation=True)
 def inference(link, loaded_model, task):
     yt = YouTube(link)
+    path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
     if task == "Transcribe":
         options = dict(task="transcribe", best_of=5)
         results = loaded_model.transcribe(path, **options)
         raise ValueError("Task not supported")
+@st.cache(allow_output_mutation=True)
 def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
     segmentStream = StringIO()
 def generate_subtitled_video(video, audio, transcript):
     video_file = ffmpeg.input(video)
     audio_file = ffmpeg.input(audio)
+    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
+    video_with_subs = open("final.mp4", "rb")
     return video_with_subs
 def main():
+    size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
+    loaded_model = change_model(current_size, size)
+    st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
+        f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    link = st.text_input("YouTube Link (The longer the video, the longer the processing time)")
     task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
     if task == "Transcribe":
         if st.button("Transcribe"):
             author, title, description, thumbnail, length, views = populate_metadata(link)
+            results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
             with col3:
                 st.video(video)
+            # Write the results to a .txt file and download it.
             with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
             with col4:
                 with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
+            with col8:
+                st.download_button(label="Download Subtitled Video",
+                                    data=video_with_subs,
+                                    file_name=f"{title} with subtitles.mp4")
     elif task == "Translate":
         if st.button("Translate to English"):
             author, title, description, thumbnail, length, views = populate_metadata(link)
+            results = inference(link, loaded_model, task)
             video = download_video(link)
             lang = results[3]
             detected_language = get_language_code(lang)
             col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
             with col3:
                 st.video(video)
+            # Write the results to a .txt file and download it.
             with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
                 f.close()
             with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
                 datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
             with col4:
                 with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
+            with col8:
+                st.download_button(label="Download Subtitled Video",
+                                    data=video_with_subs,
+                                    file_name=f"{title} with subtitles.mp4")
     else:
+        st.error("Please select a task.")
 if __name__ == "__main__":
     main()
+    st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Batuhan Yılmaz
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,22 +1,12 @@
-## Auto-Subtitled-Video-Generator
-![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)
-![Streamlit](https://img.shields.io/badge/Streamlit-FF4B4B?style=for-the-badge&logo=Streamlit&logoColor=white)
-![OpenAI](https://camo.githubusercontent.com/ea872adb9aba9cf6b4e976262f6d4b83b97972d0d5a7abccfde68eb2ae55325f/68747470733a2f2f696d672e736869656c64732e696f2f7374617469632f76313f7374796c653d666f722d7468652d6261646765266d6573736167653d4f70656e414926636f6c6f723d343132393931266c6f676f3d4f70656e4149266c6f676f436f6c6f723d464646464646266c6162656c3d)
-#### About this project
-- This project is an automatic speech recognition application that takes a YouTube video link or a video file as input to generate a video with subtitles.
-- You can also upload an audio file to generate a transcript as .txt, .vtt, .srt files.
-- The application performs 2 tasks:
-  - Detects the language, transcribes the input video in its original language.
-  - Detects the language, translates it into English and then transcribes.
-- Downloaded the video of the input link using [pytube](https://github.com/pytube/pytube).
-- Generated a transcription of the video using the [OpenAI Whisper](https://openai.com/blog/whisper) model.
-- Saved the transcriptions as .txt, .vtt and .srt files.
-- Generated a subtitled version of the input video using [ffmpeg](https://github.com/FFmpeg).
-- Displayed the original video and the subtitled video side by side.
-- Built a multipage web app using [Streamlit](https://streamlit.io) and hosted on [HuggingFace Spaces](https://huggingface.co/spaces).
-- You can download the generated .txt, .vtt, .srt files and the subtitled video.
-- You can use the app via this [link](https://huggingface.co/spaces/BatuhanYilmaz/Auto-Subtitled-Video-Generator).
-![](auto-sub.gif)

+---
+title: Whisper-Auto-Subtitled-Video-Generator
+emoji: 🎥
+colorFrom: blue
+colorTo: purple
+sdk: streamlit
+sdk_version: 1.10.0
+app_file: 01_🎥_Input_YouTube_Link.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference