Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

Eason Lu commited on Mar 22, 2023

Commit

6113bd9

1 Parent(s): cf5f1c9

merge segments

Browse files

Former-commit-id: 3b73651a94d5dac62b0c7577f59b3d59509839f9

Files changed (2) hide show

SRT.py +60 -15
pipeline.py +13 -7

SRT.py CHANGED Viewed

@@ -8,26 +8,31 @@ class SRT_segment(object):
             segment = args[0]
             self.start_time_str = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
             self.end_time_str = str(0)+str(timedelta(seconds=int(segment['end'])))+',000'
-            self.segment_id = segment['id']+1
             self.source_text = segment['text']
             self.duration = f"{self.start_time_str} --> {self.end_time_str}"
             self.translation = ""
         elif isinstance(args[0], list):
-            self.segment_id = args[0][0]
             self.source_text = args[0][2]
             self.duration = args[0][1]
-            self.start_time_str = self.duration.split("-->")[0]
-            self.end_time_str = self.duration.split("-->")[1]
             self.translation = ""
     def __str__(self) -> str:
-        return  f'{self.segment_id}\n{self.duration}\n{self.source_text}\n\n'
     def get_trans_str(self) -> str:
-        return f'{self.segment_id}\n{self.duration}\n{self.translation}\n\n'
     def get_bilingual_str(self) -> str:
-        return f'{self.segment_id}\n{self.duration}\n{self.source_text}\n{self.translation}\n\n'
 class SRT_script():
     def __init__(self, segments) -> None:
@@ -48,42 +53,82 @@ class SRT_script():
         return cls(segments)
     def set_translation(self, translate:str, id_range:tuple):
         start_seg_id = id_range[0]
         end_seg_id = id_range[1]
         lines = translate.split('\n\n')
-        print(id_range)
-        print(translate)
-        # print(len(translate))
         for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
             seg.translation = lines[i]
         pass
     def get_source_only(self):
         # return a string with pure source text
         result = ""
-        for seg in self.segments:
-            result+=f'{seg.source_text}\n\n'
         return result
     def reform_src_str(self):
         result = ""
-        for seg in self.segments:
             result += str(seg)
         return result
     def reform_trans_str(self):
         result = ""
-        for seg in self.segments:
             result += seg.get_trans_str()
         return result
     def form_bilingual_str(self):
         result = ""
-        for seg in self.segments:
             result += seg.get_bilingual_str()
         return result

             segment = args[0]
             self.start_time_str = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
             self.end_time_str = str(0)+str(timedelta(seconds=int(segment['end'])))+',000'
             self.source_text = segment['text']
             self.duration = f"{self.start_time_str} --> {self.end_time_str}"
             self.translation = ""
         elif isinstance(args[0], list):
             self.source_text = args[0][2]
             self.duration = args[0][1]
+            self.start_time_str = self.duration.split(" --> ")[0]
+            self.end_time_str = self.duration.split(" --> ")[1]
             self.translation = ""
+    def merge_seg(self, seg):
+        self.source_text += seg.source_text
+        self.translation += seg.translation
+        self.end_time_str = seg.end_time_str
+        self.duration = f"{self.start_time_str} --> {self.end_time_str}"
+        pass
     def __str__(self) -> str:
+        return  f'{self.duration}\n{self.source_text}\n\n'
     def get_trans_str(self) -> str:
+        return f'{self.duration}\n{self.translation}\n\n'
     def get_bilingual_str(self) -> str:
+        return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
 class SRT_script():
     def __init__(self, segments) -> None:
         return cls(segments)
+    def merge_segs(self, idx_list) -> SRT_segment:
+        final_seg = self.segments[idx_list[0]]
+        if len(idx_list) == 1:
+            return final_seg
+        for idx in range(1, len(idx_list)):
+            final_seg.merge_seg(self.segments[idx_list[idx]])
+        return final_seg
+    def form_whole_sentence(self):
+        merge_list = [] # a list of indices that should be merged e.g. [[0], [2, 3, 4], [5, 6], [7]]
+        sentence = []
+        for i, seg in enumerate(self.segments):
+            if seg.source_text[-1] == '.':
+                sentence.append(i)
+                merge_list.append(sentence)
+                sentence = []
+            else:
+                sentence.append(i)
+        segments = []
+        for idx_list in merge_list:
+            segments.append(self.merge_segs(idx_list))
+        self.segments = segments # need memory release?
     def set_translation(self, translate:str, id_range:tuple):
         start_seg_id = id_range[0]
         end_seg_id = id_range[1]
         lines = translate.split('\n\n')
+        if len(lines) != (end_seg_id - start_seg_id + 1):
+            print(id_range)
+            for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
+                print(seg.source_text)
+            print(translate)
         for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
             seg.translation = lines[i]
         pass
+    def split_seg(self, seg_id):
+        # TODO: evenly split seg to 2 parts and add new seg into self.segments
+        pass
+    def check_len_and_split(self, threshold):
+        # TODO: if sentence length >= threshold, split this segments to two
+        pass
     def get_source_only(self):
         # return a string with pure source text
         result = ""
+        for i, seg in enumerate(self.segments):
+            result+=f'SENTENCE {i+1}: {seg.source_text}\n\n\n'
         return result
     def reform_src_str(self):
         result = ""
+        for i, seg in enumerate(self.segments):
+            result += f'{i+1}\n'
             result += str(seg)
         return result
     def reform_trans_str(self):
         result = ""
+        for i, seg in enumerate(self.segments):
+            result += f'{i+1}\n'
             result += seg.get_trans_str()
         return result
     def form_bilingual_str(self):
         result = ""
+        for i, seg in enumerate(self.segments):
+            result += f'{i+1}\n'
             result += seg.get_bilingual_str()
         return result

pipeline.py CHANGED Viewed

@@ -88,8 +88,6 @@ if not os.path.exists(f'{RESULT_PATH}/{VIDEO_NAME}'):
 srt_file_en = args.srt_file
 if srt_file_en is not None:
-    # with open(srt_file_en, 'r', encoding='utf-8') as f:
-    #     script_input = f.read()
     srt = SRT_script.parse_from_srt_file(srt_file_en)
     script_input = srt.get_source_only()
 else:
@@ -106,12 +104,20 @@ else:
         # use stable-whisper
         model = stable_whisper.load_model('base')
-        transcript = model.transcribe(audio_path)
-        transcript.to_srt_vtt(srt_file_en)
         transcript = transcript.to_dict()
         srt = SRT_script(transcript['segments']) # read segments to SRT class
         script_input = srt.get_source_only()
         #Write SRT file
         # from whisper.utils import WriteSRT
@@ -168,7 +174,7 @@ if not args.only_srt:
 # script_input_withForceTerm = re.sub('\n ', '\n', "".join(ready_words))
-srt.correct_with_force_term()
 # Split the video script by sentences and create chunks within the token limit
 def script_split(script_in, chunk_size = 1000):
@@ -199,8 +205,8 @@ script_arr, range_arr = script_split(script_input)
 # Translate and save
 for s, range in tqdm(zip(script_arr, range_arr)):
-    print(s)
     # using chatgpt model
     if model_name == "gpt-3.5-turbo":
         # print(s + "\n")
         response = openai.ChatCompletion.create(

 srt_file_en = args.srt_file
 if srt_file_en is not None:
     srt = SRT_script.parse_from_srt_file(srt_file_en)
     script_input = srt.get_source_only()
 else:
         # use stable-whisper
         model = stable_whisper.load_model('base')
+        transcript = model.transcribe(audio_path, regroup = False)
+        (
+            transcript
+            .split_by_punctuation(['.', '。', '?'])
+            .merge_by_gap(.15, max_words=3)
+            .merge_by_punctuation([' '])
+            .split_by_punctuation(['.', '。', '?'])
+        )
+        # transcript.to_srt_vtt(srt_file_en)
         transcript = transcript.to_dict()
         srt = SRT_script(transcript['segments']) # read segments to SRT class
+        srt.form_whole_sentence()
         script_input = srt.get_source_only()
+        srt.write_srt_file_src(srt_file_en)
         #Write SRT file
         # from whisper.utils import WriteSRT
 # script_input_withForceTerm = re.sub('\n ', '\n', "".join(ready_words))
+# srt.correct_with_force_term()
 # Split the video script by sentences and create chunks within the token limit
 def script_split(script_in, chunk_size = 1000):
 # Translate and save
 for s, range in tqdm(zip(script_arr, range_arr)):
     # using chatgpt model
+    print(f"now translating sentences {range}")
     if model_name == "gpt-3.5-turbo":
         # print(s + "\n")
         response = openai.ChatCompletion.create(