Spaces:
Sleeping
Sleeping
Eason Lu
commited on
Commit
·
e75254e
1
Parent(s):
e3825f8
solve: maximum recursion
Browse filesFormer-commit-id: e8ac2862b0a0745b3093a2129dda8abf2672f05e
- SRT.py +16 -3
- pipeline.py +6 -7
SRT.py
CHANGED
|
@@ -62,6 +62,16 @@ class SRT_segment(object):
|
|
| 62 |
def get_bilingual_str(self) -> str:
|
| 63 |
return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
class SRT_script():
|
| 66 |
def __init__(self, segments) -> None:
|
| 67 |
self.segments = []
|
|
@@ -181,8 +191,12 @@ class SRT_script():
|
|
| 181 |
#print(lines[i])
|
| 182 |
pass
|
| 183 |
|
| 184 |
-
def split_seg(self, seg, threshold
|
| 185 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
source_text = seg.source_text
|
| 187 |
translation = seg.translation
|
| 188 |
src_commas = [m.start() for m in re.finditer(',', source_text)]
|
|
@@ -333,7 +347,6 @@ class SRT_script():
|
|
| 333 |
|
| 334 |
def spell_check_term(self):
|
| 335 |
## known bug: I've will be replaced because i've is not in the dict
|
| 336 |
-
|
| 337 |
|
| 338 |
import enchant
|
| 339 |
dict = enchant.Dict('en_US')
|
|
|
|
| 62 |
def get_bilingual_str(self) -> str:
|
| 63 |
return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
|
| 64 |
|
| 65 |
+
# def set_translation(self, trans):
|
| 66 |
+
# if trans[0] == ',':
|
| 67 |
+
# trans = trans[1:]
|
| 68 |
+
# self.translation = trans
|
| 69 |
+
|
| 70 |
+
# def set_src_text(self, src_text):
|
| 71 |
+
# if src_text[0] == ',':
|
| 72 |
+
# src_text = src_text[1:]
|
| 73 |
+
# self.source_text = src_text
|
| 74 |
+
|
| 75 |
class SRT_script():
|
| 76 |
def __init__(self, segments) -> None:
|
| 77 |
self.segments = []
|
|
|
|
| 191 |
#print(lines[i])
|
| 192 |
pass
|
| 193 |
|
| 194 |
+
def split_seg(self, seg, threshold):
|
| 195 |
+
# evenly split seg to 2 parts and add new seg into self.segments
|
| 196 |
+
if seg.source_text[0] == ',':
|
| 197 |
+
seg.source_text = seg.source_text[1:]
|
| 198 |
+
if seg.translation[0] == ',':
|
| 199 |
+
seg.translation = seg.translation[1:]
|
| 200 |
source_text = seg.source_text
|
| 201 |
translation = seg.translation
|
| 202 |
src_commas = [m.start() for m in re.finditer(',', source_text)]
|
|
|
|
| 347 |
|
| 348 |
def spell_check_term(self):
|
| 349 |
## known bug: I've will be replaced because i've is not in the dict
|
|
|
|
| 350 |
|
| 351 |
import enchant
|
| 352 |
dict = enchant.Dict('en_US')
|
pipeline.py
CHANGED
|
@@ -144,7 +144,7 @@ else:
|
|
| 144 |
|
| 145 |
# srt class preprocess
|
| 146 |
srt.form_whole_sentence()
|
| 147 |
-
srt.spell_check_term()
|
| 148 |
srt.correct_with_force_term()
|
| 149 |
srt.write_srt_file_src(srt_file_en)
|
| 150 |
script_input = srt.get_source_only()
|
|
@@ -259,14 +259,13 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
|
|
| 259 |
time.sleep(30)
|
| 260 |
flag = True
|
| 261 |
# add read-time output back and modify the post-processing by using one batch as an unit.
|
| 262 |
-
print(translate)
|
| 263 |
srt.set_translation(translate, range, model_name)
|
| 264 |
-
add_length = srt.check_len_and_split_range(range)
|
| 265 |
-
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
| 266 |
-
srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
# srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
|
| 271 |
|
| 272 |
if not args.only_srt:
|
|
|
|
| 144 |
|
| 145 |
# srt class preprocess
|
| 146 |
srt.form_whole_sentence()
|
| 147 |
+
# srt.spell_check_term()
|
| 148 |
srt.correct_with_force_term()
|
| 149 |
srt.write_srt_file_src(srt_file_en)
|
| 150 |
script_input = srt.get_source_only()
|
|
|
|
| 259 |
time.sleep(30)
|
| 260 |
flag = True
|
| 261 |
# add read-time output back and modify the post-processing by using one batch as an unit.
|
|
|
|
| 262 |
srt.set_translation(translate, range, model_name)
|
| 263 |
+
# add_length = srt.check_len_and_split_range(range)
|
| 264 |
+
# srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
| 265 |
+
# srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
| 266 |
|
| 267 |
+
srt.check_len_and_split()
|
| 268 |
+
srt.write_srt_file_translate(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt")
|
| 269 |
# srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
|
| 270 |
|
| 271 |
if not args.only_srt:
|