Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

DWizard commited on Mar 23, 2023

Commit

66e606c

1 Parent(s): cf5f1c9

rewrite forceTerm replacement

Browse files

Former-commit-id: e6472c129f985724e239c662cae9064e96883dde

Files changed (1) hide show

SRT.py +30 -2

SRT.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from datetime import timedelta
 import os
 import whisper
 class SRT_segment(object):
     def __init__(self, *args) -> None:
@@ -103,9 +105,35 @@ class SRT_script():
             f.write(self.form_bilingual_str())
         pass
-    def correct_with_force_term():
-        # force term correction
         pass

 from datetime import timedelta
 import os
 import whisper
+from csv import reader
+import re
 class SRT_segment(object):
     def __init__(self, *args) -> None:
             f.write(self.form_bilingual_str())
         pass
+    def correct_with_force_term(self):
+        ## force term correction
+        # TODO: shortcut translation i.e. VA, ob
+        # TODO: variety of translation
+        # load term dictionary
+        with open("finetune_data/dict.csv",'r', encoding='utf-8') as f:
+            csv_reader = reader(f)
+            term_dict = {rows[0]:rows[1] for rows in csv_reader}
+        # change term
+        for seg in self.segments:
+            ready_words = re.sub('\n', '\n ', seg.source_text).split(" ")
+            for i in range(len(ready_words)):
+                word = ready_words[i]
+                if word[-2:] == ".\n" :
+                    if word[:-2].lower() in term_dict :
+                        new_word = word.replace(word[:-2], term_dict.get(word[:-2].lower())) + ' '
+                        ready_words[i] = new_word
+                    else:
+                        ready_words[i] = word + ' '
+                elif word.lower() in term_dict :
+                    new_word = word.replace(word,term_dict.get(word.lower())) + ' '
+                    ready_words[i] = new_word
+                else :
+                    ready_words[i]= word + ' '
+            seg.source_text = re.sub('\n ', '\n', "".join(ready_words))
+        print(self)
         pass