peteralexandercharles commited on
Commit
a590feb
·
1 Parent(s): 56e52b4

Upload 7 files

Browse files
Files changed (3) hide show
  1. 01_🎥_Input_YouTube_Link.py +84 -82
  2. LICENSE.txt +21 -0
  3. README.md +11 -21
01_🎥_Input_YouTube_Link.py CHANGED
@@ -11,39 +11,17 @@ from io import StringIO
11
  from utils import write_vtt, write_srt
12
  import ffmpeg
13
  from languages import LANGUAGES
14
- import torch
15
- from zipfile import ZipFile
16
- from io import BytesIO
17
- import base64
18
- import pathlib
19
- import re
20
 
21
  st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
22
 
23
- torch.cuda.is_available()
24
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
- # Model options: tiny, base, small, medium, large
26
- loaded_model = whisper.load_model("small", device=DEVICE)
27
-
28
-
29
-
30
  # Define a function that we can use to load lottie files from a link.
31
- @st.cache(allow_output_mutation=True)
32
  def load_lottieurl(url: str):
33
  r = requests.get(url)
34
  if r.status_code != 200:
35
  return None
36
  return r.json()
37
 
38
- APP_DIR = pathlib.Path(__file__).parent.absolute()
39
-
40
- LOCAL_DIR = APP_DIR / "local_youtube"
41
- LOCAL_DIR.mkdir(exist_ok=True)
42
- save_dir = LOCAL_DIR / "output"
43
- save_dir.mkdir(exist_ok=True)
44
-
45
-
46
-
47
  col1, col2 = st.columns([1, 3])
48
  with col1:
49
  lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
@@ -58,6 +36,7 @@ with col2:
58
  ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
59
 
60
 
 
61
  def populate_metadata(link):
62
  yt = YouTube(link)
63
  author = yt.author
@@ -69,6 +48,7 @@ def populate_metadata(link):
69
  return author, title, description, thumbnail, length, views
70
 
71
 
 
72
  def download_video(link):
73
  yt = YouTube(link)
74
  video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
@@ -79,9 +59,23 @@ def convert(seconds):
79
  return time.strftime("%H:%M:%S", time.gmtime(seconds))
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def inference(link, loaded_model, task):
83
  yt = YouTube(link)
84
- path = yt.streams.filter(only_audio=True)[0].download(filename=f"{save_dir}/audio.mp3")
85
  if task == "Transcribe":
86
  options = dict(task="transcribe", best_of=5)
87
  results = loaded_model.transcribe(path, **options)
@@ -100,6 +94,7 @@ def inference(link, loaded_model, task):
100
  raise ValueError("Task not supported")
101
 
102
 
 
103
  def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
104
  segmentStream = StringIO()
105
 
@@ -125,34 +120,35 @@ def get_language_code(language):
125
  def generate_subtitled_video(video, audio, transcript):
126
  video_file = ffmpeg.input(video)
127
  audio_file = ffmpeg.input(audio)
128
- ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("youtube_sub.mp4").run(quiet=True, overwrite_output=True)
129
- video_with_subs = open("youtube_sub.mp4", "rb")
130
  return video_with_subs
131
 
132
 
133
  def main():
134
- link = st.text_input("YouTube Link (The longer the video, the longer the processing time)", placeholder="Input YouTube link and press enter")
 
 
 
 
135
  task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
136
  if task == "Transcribe":
137
  if st.button("Transcribe"):
138
  author, title, description, thumbnail, length, views = populate_metadata(link)
139
- with st.spinner("Transcribing the video..."):
140
- results = inference(link, loaded_model, task)
141
  video = download_video(link)
142
  lang = results[3]
143
  detected_language = get_language_code(lang)
144
 
145
  col3, col4 = st.columns(2)
 
 
146
  with col3:
147
  st.video(video)
148
-
149
- # Split result["text"] on !,? and . , but save the punctuation
150
- sentences = re.split("([!?.])", results[0])
151
- # Join the punctuation back to the sentences
152
- sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
153
- text = "\n\n".join(sentences)
154
  with open("transcript.txt", "w+", encoding='utf8') as f:
155
- f.writelines(text)
156
  f.close()
157
  with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
158
  datatxt = f.read()
@@ -168,48 +164,50 @@ def main():
168
  f.close()
169
  with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
170
  datasrt = f.read()
171
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  with col4:
173
  with st.spinner("Generating Subtitled Video"):
174
- video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
175
  st.video(video_with_subs)
176
  st.balloons()
177
-
178
- zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
179
- zipObj.write("transcript.txt")
180
- zipObj.write("transcript.vtt")
181
- zipObj.write("transcript.srt")
182
- zipObj.write("youtube_sub.mp4")
183
- zipObj.close()
184
- ZipfileDotZip = "YouTube_transcripts_and_video.zip"
185
- with open(ZipfileDotZip, "rb") as f:
186
- datazip = f.read()
187
- b64 = base64.b64encode(datazip).decode()
188
- href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
189
- Download Transcripts and Video\
190
- </a>"
191
- st.markdown(href, unsafe_allow_html=True)
192
-
193
  elif task == "Translate":
194
  if st.button("Translate to English"):
195
  author, title, description, thumbnail, length, views = populate_metadata(link)
196
- with st.spinner("Translating to English..."):
197
- results = inference(link, loaded_model, task)
198
  video = download_video(link)
199
  lang = results[3]
200
  detected_language = get_language_code(lang)
201
 
202
  col3, col4 = st.columns(2)
 
 
203
  with col3:
204
  st.video(video)
205
 
206
- # Split result["text"] on !,? and . , but save the punctuation
207
- sentences = re.split("([!?.])", results[0])
208
- # Join the punctuation back to the sentences
209
- sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
210
- text = "\n\n".join(sentences)
211
  with open("transcript.txt", "w+", encoding='utf8') as f:
212
- f.writelines(text)
213
  f.close()
214
  with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
215
  datatxt = f.read()
@@ -225,32 +223,36 @@ def main():
225
  f.close()
226
  with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
227
  datasrt = f.read()
228
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  with col4:
230
  with st.spinner("Generating Subtitled Video"):
231
- video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
232
  st.video(video_with_subs)
233
  st.balloons()
234
-
235
- zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
236
- zipObj.write("transcript.txt")
237
- zipObj.write("transcript.vtt")
238
- zipObj.write("transcript.srt")
239
- zipObj.write("youtube_sub.mp4")
240
- zipObj.close()
241
- ZipfileDotZip = "YouTube_transcripts_and_video.zip"
242
- with open(ZipfileDotZip, "rb") as f:
243
- datazip = f.read()
244
- b64 = base64.b64encode(datazip).decode()
245
- href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
246
- Download Transcripts and Video\
247
- </a>"
248
- st.markdown(href, unsafe_allow_html=True)
249
-
250
  else:
251
- st.info("Please select a task.")
252
 
253
 
254
  if __name__ == "__main__":
255
  main()
256
-
 
11
  from utils import write_vtt, write_srt
12
  import ffmpeg
13
  from languages import LANGUAGES
 
 
 
 
 
 
14
 
15
  st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
16
 
 
 
 
 
 
 
 
17
  # Define a function that we can use to load lottie files from a link.
18
+ @st.cache()
19
  def load_lottieurl(url: str):
20
  r = requests.get(url)
21
  if r.status_code != 200:
22
  return None
23
  return r.json()
24
 
 
 
 
 
 
 
 
 
 
25
  col1, col2 = st.columns([1, 3])
26
  with col1:
27
  lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
 
36
  ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
37
 
38
 
39
+ @st.cache(allow_output_mutation=True)
40
  def populate_metadata(link):
41
  yt = YouTube(link)
42
  author = yt.author
 
48
  return author, title, description, thumbnail, length, views
49
 
50
 
51
+ @st.cache(allow_output_mutation=True)
52
  def download_video(link):
53
  yt = YouTube(link)
54
  video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
 
59
  return time.strftime("%H:%M:%S", time.gmtime(seconds))
60
 
61
 
62
+ loaded_model = whisper.load_model("base")
63
+ current_size = "None"
64
+
65
+
66
+ @st.cache(allow_output_mutation=True)
67
+ def change_model(current_size, size):
68
+ if current_size != size:
69
+ loaded_model = whisper.load_model(size)
70
+ return loaded_model
71
+ else:
72
+ raise Exception("Model size is the same as the current size.")
73
+
74
+
75
+ @st.cache(allow_output_mutation=True)
76
  def inference(link, loaded_model, task):
77
  yt = YouTube(link)
78
+ path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
79
  if task == "Transcribe":
80
  options = dict(task="transcribe", best_of=5)
81
  results = loaded_model.transcribe(path, **options)
 
94
  raise ValueError("Task not supported")
95
 
96
 
97
+ @st.cache(allow_output_mutation=True)
98
  def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
99
  segmentStream = StringIO()
100
 
 
120
  def generate_subtitled_video(video, audio, transcript):
121
  video_file = ffmpeg.input(video)
122
  audio_file = ffmpeg.input(audio)
123
+ ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
124
+ video_with_subs = open("final.mp4", "rb")
125
  return video_with_subs
126
 
127
 
128
  def main():
129
+ size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
130
+ loaded_model = change_model(current_size, size)
131
+ st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
132
+ f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
133
+ link = st.text_input("YouTube Link (The longer the video, the longer the processing time)")
134
  task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
135
  if task == "Transcribe":
136
  if st.button("Transcribe"):
137
  author, title, description, thumbnail, length, views = populate_metadata(link)
138
+ results = inference(link, loaded_model, task)
 
139
  video = download_video(link)
140
  lang = results[3]
141
  detected_language = get_language_code(lang)
142
 
143
  col3, col4 = st.columns(2)
144
+ col5, col6, col7, col8 = st.columns(4)
145
+ col9, col10 = st.columns(2)
146
  with col3:
147
  st.video(video)
148
+
149
+ # Write the results to a .txt file and download it.
 
 
 
 
150
  with open("transcript.txt", "w+", encoding='utf8') as f:
151
+ f.writelines(results[0])
152
  f.close()
153
  with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
154
  datatxt = f.read()
 
164
  f.close()
165
  with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
166
  datasrt = f.read()
167
+
168
+ with col5:
169
+ st.download_button(label="Download Transcript (.txt)",
170
+ data=datatxt,
171
+ file_name="transcript.txt")
172
+ with col6:
173
+ st.download_button(label="Download Transcript (.vtt)",
174
+ data=datavtt,
175
+ file_name="transcript.vtt")
176
+ with col7:
177
+ st.download_button(label="Download Transcript (.srt)",
178
+ data=datasrt,
179
+ file_name="transcript.srt")
180
+ with col9:
181
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
182
+ with col10:
183
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
184
+
185
  with col4:
186
  with st.spinner("Generating Subtitled Video"):
187
+ video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
188
  st.video(video_with_subs)
189
  st.balloons()
190
+ with col8:
191
+ st.download_button(label="Download Subtitled Video",
192
+ data=video_with_subs,
193
+ file_name=f"{title} with subtitles.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
194
  elif task == "Translate":
195
  if st.button("Translate to English"):
196
  author, title, description, thumbnail, length, views = populate_metadata(link)
197
+ results = inference(link, loaded_model, task)
 
198
  video = download_video(link)
199
  lang = results[3]
200
  detected_language = get_language_code(lang)
201
 
202
  col3, col4 = st.columns(2)
203
+ col5, col6, col7, col8 = st.columns(4)
204
+ col9, col10 = st.columns(2)
205
  with col3:
206
  st.video(video)
207
 
208
+ # Write the results to a .txt file and download it.
 
 
 
 
209
  with open("transcript.txt", "w+", encoding='utf8') as f:
210
+ f.writelines(results[0])
211
  f.close()
212
  with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
213
  datatxt = f.read()
 
223
  f.close()
224
  with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
225
  datasrt = f.read()
226
+ with col5:
227
+ st.download_button(label="Download Transcript (.txt)",
228
+ data=datatxt,
229
+ file_name="transcript.txt")
230
+ with col6:
231
+ st.download_button(label="Download Transcript (.vtt)",
232
+ data=datavtt,
233
+ file_name="transcript.vtt")
234
+ with col7:
235
+ st.download_button(label="Download Transcript (.srt)",
236
+ data=datasrt,
237
+ file_name="transcript.srt")
238
+ with col9:
239
+ st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
240
+ with col10:
241
+ st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
242
+
243
  with col4:
244
  with st.spinner("Generating Subtitled Video"):
245
+ video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
246
  st.video(video_with_subs)
247
  st.balloons()
248
+ with col8:
249
+ st.download_button(label="Download Subtitled Video",
250
+ data=video_with_subs,
251
+ file_name=f"{title} with subtitles.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
252
  else:
253
+ st.error("Please select a task.")
254
 
255
 
256
  if __name__ == "__main__":
257
  main()
258
+ st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")
LICENSE.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Batuhan Yılmaz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,22 +1,12 @@
1
- ## Auto-Subtitled-Video-Generator
 
 
 
 
 
 
 
 
 
2
 
3
- ![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)
4
- ![Streamlit](https://img.shields.io/badge/Streamlit-FF4B4B?style=for-the-badge&logo=Streamlit&logoColor=white)
5
- ![OpenAI](https://camo.githubusercontent.com/ea872adb9aba9cf6b4e976262f6d4b83b97972d0d5a7abccfde68eb2ae55325f/68747470733a2f2f696d672e736869656c64732e696f2f7374617469632f76313f7374796c653d666f722d7468652d6261646765266d6573736167653d4f70656e414926636f6c6f723d343132393931266c6f676f3d4f70656e4149266c6f676f436f6c6f723d464646464646266c6162656c3d)
6
-
7
- #### About this project
8
- - This project is an automatic speech recognition application that takes a YouTube video link or a video file as input to generate a video with subtitles.
9
- - You can also upload an audio file to generate a transcript as .txt, .vtt, .srt files.
10
- - The application performs 2 tasks:
11
- - Detects the language, transcribes the input video in its original language.
12
- - Detects the language, translates it into English and then transcribes.
13
- - Downloaded the video of the input link using [pytube](https://github.com/pytube/pytube).
14
- - Generated a transcription of the video using the [OpenAI Whisper](https://openai.com/blog/whisper) model.
15
- - Saved the transcriptions as .txt, .vtt and .srt files.
16
- - Generated a subtitled version of the input video using [ffmpeg](https://github.com/FFmpeg).
17
- - Displayed the original video and the subtitled video side by side.
18
- - Built a multipage web app using [Streamlit](https://streamlit.io) and hosted on [HuggingFace Spaces](https://huggingface.co/spaces).
19
- - You can download the generated .txt, .vtt, .srt files and the subtitled video.
20
- - You can use the app via this [link](https://huggingface.co/spaces/BatuhanYilmaz/Auto-Subtitled-Video-Generator).
21
-
22
- ![](auto-sub.gif)
 
1
+ ---
2
+ title: Whisper-Auto-Subtitled-Video-Generator
3
+ emoji: 🎥
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: 01_🎥_Input_YouTube_Link.py
9
+ pinned: false
10
+ ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference