peteralexandercharles commited on
Commit
56e52b4
·
1 Parent(s): d6807e7

Upload 9 files

Browse files
Files changed (10) hide show
  1. .gitattributes +1 -0
  2. 01_🎥_Input_YouTube_Link.py +256 -0
  3. Dockerfile +20 -0
  4. LICENSE +21 -0
  5. README.md +21 -12
  6. auto-sub.gif +3 -0
  7. languages.py +101 -0
  8. packages.txt +1 -0
  9. requirements.txt +9 -0
  10. utils.py +96 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ auto-sub.gif filter=lfs diff=lfs merge=lfs -text
01_🎥_Input_YouTube_Link.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from pytube import YouTube
3
+ import requests
4
+ import time
5
+ import streamlit as st
6
+ from streamlit_lottie import st_lottie
7
+ import numpy as np
8
+ import os
9
+ from typing import Iterator
10
+ from io import StringIO
11
+ from utils import write_vtt, write_srt
12
+ import ffmpeg
13
+ from languages import LANGUAGES
14
+ import torch
15
+ from zipfile import ZipFile
16
+ from io import BytesIO
17
+ import base64
18
+ import pathlib
19
+ import re
20
+
21
+ st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
22
+
23
+ torch.cuda.is_available()
24
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
+ # Model options: tiny, base, small, medium, large
26
+ loaded_model = whisper.load_model("small", device=DEVICE)
27
+
28
+
29
+
30
+ # Define a function that we can use to load lottie files from a link.
31
+ @st.cache(allow_output_mutation=True)
32
+ def load_lottieurl(url: str):
33
+ r = requests.get(url)
34
+ if r.status_code != 200:
35
+ return None
36
+ return r.json()
37
+
38
+ APP_DIR = pathlib.Path(__file__).parent.absolute()
39
+
40
+ LOCAL_DIR = APP_DIR / "local_youtube"
41
+ LOCAL_DIR.mkdir(exist_ok=True)
42
+ save_dir = LOCAL_DIR / "output"
43
+ save_dir.mkdir(exist_ok=True)
44
+
45
+
46
+
47
+ col1, col2 = st.columns([1, 3])
48
+ with col1:
49
+ lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
50
+ st_lottie(lottie)
51
+
52
+ with col2:
53
+ st.write("""
54
+ ## Auto Subtitled Video Generator
55
+ ##### Input a YouTube video link and get a video with subtitles.
56
+ ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
57
+ ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
58
+ ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
59
+
60
+
61
+ def populate_metadata(link):
62
+ yt = YouTube(link)
63
+ author = yt.author
64
+ title = yt.title
65
+ description = yt.description
66
+ thumbnail = yt.thumbnail_url
67
+ length = yt.length
68
+ views = yt.views
69
+ return author, title, description, thumbnail, length, views
70
+
71
+
72
+ def download_video(link):
73
+ yt = YouTube(link)
74
+ video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
75
+ return video
76
+
77
+
78
+ def convert(seconds):
79
+ return time.strftime("%H:%M:%S", time.gmtime(seconds))
80
+
81
+
82
+ def inference(link, loaded_model, task):
83
+ yt = YouTube(link)
84
+ path = yt.streams.filter(only_audio=True)[0].download(filename=f"{save_dir}/audio.mp3")
85
+ if task == "Transcribe":
86
+ options = dict(task="transcribe", best_of=5)
87
+ results = loaded_model.transcribe(path, **options)
88
+ vtt = getSubs(results["segments"], "vtt", 80)
89
+ srt = getSubs(results["segments"], "srt", 80)
90
+ lang = results["language"]
91
+ return results["text"], vtt, srt, lang
92
+ elif task == "Translate":
93
+ options = dict(task="translate", best_of=5)
94
+ results = loaded_model.transcribe(path, **options)
95
+ vtt = getSubs(results["segments"], "vtt", 80)
96
+ srt = getSubs(results["segments"], "srt", 80)
97
+ lang = results["language"]
98
+ return results["text"], vtt, srt, lang
99
+ else:
100
+ raise ValueError("Task not supported")
101
+
102
+
103
+ def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
104
+ segmentStream = StringIO()
105
+
106
+ if format == 'vtt':
107
+ write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
108
+ elif format == 'srt':
109
+ write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
110
+ else:
111
+ raise Exception("Unknown format " + format)
112
+
113
+ segmentStream.seek(0)
114
+ return segmentStream.read()
115
+
116
+
117
+ def get_language_code(language):
118
+ if language in LANGUAGES.keys():
119
+ detected_language = LANGUAGES[language]
120
+ return detected_language
121
+ else:
122
+ raise ValueError("Language not supported")
123
+
124
+
125
+ def generate_subtitled_video(video, audio, transcript):
126
+ video_file = ffmpeg.input(video)
127
+ audio_file = ffmpeg.input(audio)
128
+ ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("youtube_sub.mp4").run(quiet=True, overwrite_output=True)
129
+ video_with_subs = open("youtube_sub.mp4", "rb")
130
+ return video_with_subs
131
+
132
+
133
+ def main():
134
+ link = st.text_input("YouTube Link (The longer the video, the longer the processing time)", placeholder="Input YouTube link and press enter")
135
+ task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
136
+ if task == "Transcribe":
137
+ if st.button("Transcribe"):
138
+ author, title, description, thumbnail, length, views = populate_metadata(link)
139
+ with st.spinner("Transcribing the video..."):
140
+ results = inference(link, loaded_model, task)
141
+ video = download_video(link)
142
+ lang = results[3]
143
+ detected_language = get_language_code(lang)
144
+
145
+ col3, col4 = st.columns(2)
146
+ with col3:
147
+ st.video(video)
148
+
149
+ # Split result["text"] on !,? and . , but save the punctuation
150
+ sentences = re.split("([!?.])", results[0])
151
+ # Join the punctuation back to the sentences
152
+ sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
153
+ text = "\n\n".join(sentences)
154
+ with open("transcript.txt", "w+", encoding='utf8') as f:
155
+ f.writelines(text)
156
+ f.close()
157
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
158
+ datatxt = f.read()
159
+
160
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
161
+ f.writelines(results[1])
162
+ f.close()
163
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
164
+ datavtt = f.read()
165
+
166
+ with open("transcript.srt", "w+",encoding='utf8') as f:
167
+ f.writelines(results[2])
168
+ f.close()
169
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
170
+ datasrt = f.read()
171
+
172
+ with col4:
173
+ with st.spinner("Generating Subtitled Video"):
174
+ video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
175
+ st.video(video_with_subs)
176
+ st.balloons()
177
+
178
+ zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
179
+ zipObj.write("transcript.txt")
180
+ zipObj.write("transcript.vtt")
181
+ zipObj.write("transcript.srt")
182
+ zipObj.write("youtube_sub.mp4")
183
+ zipObj.close()
184
+ ZipfileDotZip = "YouTube_transcripts_and_video.zip"
185
+ with open(ZipfileDotZip, "rb") as f:
186
+ datazip = f.read()
187
+ b64 = base64.b64encode(datazip).decode()
188
+ href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
189
+ Download Transcripts and Video\
190
+ </a>"
191
+ st.markdown(href, unsafe_allow_html=True)
192
+
193
+ elif task == "Translate":
194
+ if st.button("Translate to English"):
195
+ author, title, description, thumbnail, length, views = populate_metadata(link)
196
+ with st.spinner("Translating to English..."):
197
+ results = inference(link, loaded_model, task)
198
+ video = download_video(link)
199
+ lang = results[3]
200
+ detected_language = get_language_code(lang)
201
+
202
+ col3, col4 = st.columns(2)
203
+ with col3:
204
+ st.video(video)
205
+
206
+ # Split result["text"] on !,? and . , but save the punctuation
207
+ sentences = re.split("([!?.])", results[0])
208
+ # Join the punctuation back to the sentences
209
+ sentences = ["".join(i) for i in zip(sentences[0::2], sentences[1::2])]
210
+ text = "\n\n".join(sentences)
211
+ with open("transcript.txt", "w+", encoding='utf8') as f:
212
+ f.writelines(text)
213
+ f.close()
214
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
215
+ datatxt = f.read()
216
+
217
+ with open("transcript.vtt", "w+",encoding='utf8') as f:
218
+ f.writelines(results[1])
219
+ f.close()
220
+ with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
221
+ datavtt = f.read()
222
+
223
+ with open("transcript.srt", "w+",encoding='utf8') as f:
224
+ f.writelines(results[2])
225
+ f.close()
226
+ with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
227
+ datasrt = f.read()
228
+
229
+ with col4:
230
+ with st.spinner("Generating Subtitled Video"):
231
+ video_with_subs = generate_subtitled_video(video, f"{save_dir}/audio.mp3", "transcript.srt")
232
+ st.video(video_with_subs)
233
+ st.balloons()
234
+
235
+ zipObj = ZipFile("YouTube_transcripts_and_video.zip", "w")
236
+ zipObj.write("transcript.txt")
237
+ zipObj.write("transcript.vtt")
238
+ zipObj.write("transcript.srt")
239
+ zipObj.write("youtube_sub.mp4")
240
+ zipObj.close()
241
+ ZipfileDotZip = "YouTube_transcripts_and_video.zip"
242
+ with open(ZipfileDotZip, "rb") as f:
243
+ datazip = f.read()
244
+ b64 = base64.b64encode(datazip).decode()
245
+ href = f"<a href=\"data:file/zip;base64,{b64}\" download='{ZipfileDotZip}'>\
246
+ Download Transcripts and Video\
247
+ </a>"
248
+ st.markdown(href, unsafe_allow_html=True)
249
+
250
+ else:
251
+ st.info("Please select a task.")
252
+
253
+
254
+ if __name__ == "__main__":
255
+ main()
256
+
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ RUN pip install virtualenv
4
+ ENV VIRTUAL_ENV=/venv
5
+ RUN virtualenv venv -p python3
6
+ ENV PATH="VIRTUAL_ENV/bin:$PATH"
7
+
8
+ WORKDIR /app
9
+ ADD . /app
10
+
11
+ # Install dependencies
12
+ RUN pip install -r requirements.txt
13
+ RUN apt-get update
14
+ RUN apt-get install -y ffmpeg
15
+
16
+ # Expose port
17
+ ENV PORT 8501
18
+
19
+ # Run the application:
20
+ CMD ["streamlit","run","01_🎥_Input_YouTube_Link.py"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Batuhan Yılmaz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,22 @@
1
- ---
2
- title: Auto Subtitled Video Generator
3
- emoji: 🐢
4
- colorFrom: yellow
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.15.2
8
- app_file: app.py
9
- pinned: false
10
- license: other
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Auto-Subtitled-Video-Generator
 
 
 
 
 
 
 
 
 
 
2
 
3
+ ![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)
4
+ ![Streamlit](https://img.shields.io/badge/Streamlit-FF4B4B?style=for-the-badge&logo=Streamlit&logoColor=white)
5
+ ![OpenAI](https://camo.githubusercontent.com/ea872adb9aba9cf6b4e976262f6d4b83b97972d0d5a7abccfde68eb2ae55325f/68747470733a2f2f696d672e736869656c64732e696f2f7374617469632f76313f7374796c653d666f722d7468652d6261646765266d6573736167653d4f70656e414926636f6c6f723d343132393931266c6f676f3d4f70656e4149266c6f676f436f6c6f723d464646464646266c6162656c3d)
6
+
7
+ #### About this project
8
+ - This project is an automatic speech recognition application that takes a YouTube video link or a video file as input to generate a video with subtitles.
9
+ - You can also upload an audio file to generate a transcript as .txt, .vtt, .srt files.
10
+ - The application performs 2 tasks:
11
+ - Detects the language, transcribes the input video in its original language.
12
+ - Detects the language, translates it into English and then transcribes.
13
+ - Downloaded the video of the input link using [pytube](https://github.com/pytube/pytube).
14
+ - Generated a transcription of the video using the [OpenAI Whisper](https://openai.com/blog/whisper) model.
15
+ - Saved the transcriptions as .txt, .vtt and .srt files.
16
+ - Generated a subtitled version of the input video using [ffmpeg](https://github.com/FFmpeg).
17
+ - Displayed the original video and the subtitled video side by side.
18
+ - Built a multipage web app using [Streamlit](https://streamlit.io) and hosted on [HuggingFace Spaces](https://huggingface.co/spaces).
19
+ - You can download the generated .txt, .vtt, .srt files and the subtitled video.
20
+ - You can use the app via this [link](https://huggingface.co/spaces/BatuhanYilmaz/Auto-Subtitled-Video-Generator).
21
+
22
+ ![](auto-sub.gif)
auto-sub.gif ADDED

Git LFS Details

  • SHA256: b72017738c2b03d21c2eb513b06eba5f6deadfd628ff21f7d2c6613bd48534b3
  • Pointer size: 132 Bytes
  • Size of remote file: 4.55 MB
languages.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LANGUAGES = {
2
+ "en": "eng",
3
+ "zh": "zho",
4
+ "de": "deu",
5
+ "es": "spa",
6
+ "ru": "rus",
7
+ "ko": "kor",
8
+ "fr": "fra",
9
+ "ja": "jpn",
10
+ "pt": "por",
11
+ "tr": "tur",
12
+ "pl": "pol",
13
+ "ca": "cat",
14
+ "nl": "nld",
15
+ "ar": "ara",
16
+ "sv": "swe",
17
+ "it": "ita",
18
+ "id": "ind",
19
+ "hi": "hin",
20
+ "fi": "fin",
21
+ "vi": "vie",
22
+ "iw": "heb",
23
+ "uk": "ukr",
24
+ "el": "ell",
25
+ "ms": "msa",
26
+ "cs": "ces",
27
+ "ro": "ron",
28
+ "da": "dan",
29
+ "hu": "hun",
30
+ "ta": "tam",
31
+ "no": "nor",
32
+ "th": "tha",
33
+ "ur": "urd",
34
+ "hr": "hrv",
35
+ "bg": "bul",
36
+ "lt": "lit",
37
+ "la": "lat",
38
+ "mi": "mri",
39
+ "ml": "mal",
40
+ "cy": "cym",
41
+ "sk": "slk",
42
+ "te": "tel",
43
+ "fa": "fas",
44
+ "lv": "lav",
45
+ "bn": "ben",
46
+ "sr": "srp",
47
+ "az": "aze",
48
+ "sl": "slv",
49
+ "kn": "kan",
50
+ "et": "est",
51
+ "mk": "mkd",
52
+ "br": "bre",
53
+ "eu": "eus",
54
+ "is": "isl",
55
+ "hy": "hye",
56
+ "ne": "nep",
57
+ "mn": "mon",
58
+ "bs": "bos",
59
+ "kk": "kaz",
60
+ "sq": "sqi",
61
+ "sw": "swa",
62
+ "gl": "glg",
63
+ "mr": "mar",
64
+ "pa": "pan",
65
+ "si": "sin",
66
+ "km": "khm",
67
+ "sn": "sna",
68
+ "yo": "yor",
69
+ "so": "som",
70
+ "af": "afr",
71
+ "oc": "oci",
72
+ "ka": "kat",
73
+ "be": "bel",
74
+ "tg": "tgk",
75
+ "sd": "snd",
76
+ "gu": "guj",
77
+ "am": "amh",
78
+ "yi": "yid",
79
+ "lo": "lao",
80
+ "uz": "uzb",
81
+ "fo": "fao",
82
+ "ht": "hat",
83
+ "ps": "pus",
84
+ "tk": "tuk",
85
+ "nn": "nno",
86
+ "mt": "mlt",
87
+ "sa": "san",
88
+ "lb": "ltz",
89
+ "my": "mya",
90
+ "bo": "bod",
91
+ "tl": "tgl",
92
+ "mg": "mlg",
93
+ "as": "asm",
94
+ "tt": "tat",
95
+ "haw": "haw",
96
+ "ln": "lin",
97
+ "ha": "hau",
98
+ "ba": "bak",
99
+ "jw": "jav",
100
+ "su": "sun",
101
+ }
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/openai/whisper.git
2
+ ffmpeg==1.4
3
+ ffmpeg_python==0.2.0
4
+ numpy==1.23.3
5
+ pytube==12.1.0
6
+ requests==2.28.1
7
+ streamlit==1.13.0
8
+ streamlit_lottie==0.0.3
9
+ whisper
utils.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import textwrap
2
+ import zlib
3
+ from typing import Iterator, TextIO
4
+
5
+
6
+ def exact_div(x, y):
7
+ assert x % y == 0
8
+ return x // y
9
+
10
+
11
+ def str2bool(string):
12
+ str2val = {"True": True, "False": False}
13
+ if string in str2val:
14
+ return str2val[string]
15
+ else:
16
+ raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
17
+
18
+
19
+ def optional_int(string):
20
+ return None if string == "None" else int(string)
21
+
22
+
23
+ def optional_float(string):
24
+ return None if string == "None" else float(string)
25
+
26
+
27
+ def compression_ratio(text) -> float:
28
+ return len(text) / len(zlib.compress(text.encode("utf-8")))
29
+
30
+
31
+ def format_timestamp(seconds: float, always_include_hours: bool = False, fractionalSeperator: str = '.'):
32
+ assert seconds >= 0, "non-negative timestamp expected"
33
+ milliseconds = round(seconds * 1000.0)
34
+
35
+ hours = milliseconds // 3_600_000
36
+ milliseconds -= hours * 3_600_000
37
+
38
+ minutes = milliseconds // 60_000
39
+ milliseconds -= minutes * 60_000
40
+
41
+ seconds = milliseconds // 1_000
42
+ milliseconds -= seconds * 1_000
43
+
44
+ hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
45
+ return f"{hours_marker}{minutes:02d}:{seconds:02d}{fractionalSeperator}{milliseconds:03d}"
46
+
47
+
48
+ def write_txt(transcript: Iterator[dict], file: TextIO):
49
+ for segment in transcript:
50
+ print(segment['text'].strip(), file=file, flush=True)
51
+
52
+
53
+ def write_vtt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
54
+ print("WEBVTT\n", file=file)
55
+ for segment in transcript:
56
+ text = processText(segment['text'], maxLineWidth).replace('-->', '->')
57
+
58
+ print(
59
+ f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
60
+ f"{text}\n",
61
+ file=file,
62
+ flush=True,
63
+ )
64
+
65
+
66
+ def write_srt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
67
+ """
68
+ Write a transcript to a file in SRT format.
69
+ Example usage:
70
+ from pathlib import Path
71
+ from whisper.utils import write_srt
72
+ result = transcribe(model, audio_path, temperature=temperature, **args)
73
+ # save SRT
74
+ audio_basename = Path(audio_path).stem
75
+ with open(Path(output_dir) / (audio_basename + ".srt"), "w", encoding="utf-8") as srt:
76
+ write_srt(result["segments"], file=srt)
77
+ """
78
+ for i, segment in enumerate(transcript, start=1):
79
+ text = processText(segment['text'].strip(), maxLineWidth).replace('-->', '->')
80
+
81
+ # write srt lines
82
+ print(
83
+ f"{i}\n"
84
+ f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
85
+ f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
86
+ f"{text}\n",
87
+ file=file,
88
+ flush=True,
89
+ )
90
+
91
+ def processText(text: str, maxLineWidth=None):
92
+ if (maxLineWidth is None or maxLineWidth < 0):
93
+ return text
94
+
95
+ lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
96
+ return '\n'.join(lines)