Spaces:
Sleeping
Sleeping
DWizard
commited on
Commit
·
66e606c
1
Parent(s):
cf5f1c9
rewrite forceTerm replacement
Browse filesFormer-commit-id: e6472c129f985724e239c662cae9064e96883dde
SRT.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
from datetime import timedelta
|
| 2 |
import os
|
| 3 |
import whisper
|
|
|
|
|
|
|
| 4 |
|
| 5 |
class SRT_segment(object):
|
| 6 |
def __init__(self, *args) -> None:
|
|
@@ -103,9 +105,35 @@ class SRT_script():
|
|
| 103 |
f.write(self.form_bilingual_str())
|
| 104 |
pass
|
| 105 |
|
| 106 |
-
def correct_with_force_term():
|
| 107 |
-
|
|
|
|
|
|
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
pass
|
| 110 |
|
| 111 |
|
|
|
|
| 1 |
from datetime import timedelta
|
| 2 |
import os
|
| 3 |
import whisper
|
| 4 |
+
from csv import reader
|
| 5 |
+
import re
|
| 6 |
|
| 7 |
class SRT_segment(object):
|
| 8 |
def __init__(self, *args) -> None:
|
|
|
|
| 105 |
f.write(self.form_bilingual_str())
|
| 106 |
pass
|
| 107 |
|
| 108 |
+
def correct_with_force_term(self):
|
| 109 |
+
## force term correction
|
| 110 |
+
# TODO: shortcut translation i.e. VA, ob
|
| 111 |
+
# TODO: variety of translation
|
| 112 |
|
| 113 |
+
# load term dictionary
|
| 114 |
+
with open("finetune_data/dict.csv",'r', encoding='utf-8') as f:
|
| 115 |
+
csv_reader = reader(f)
|
| 116 |
+
term_dict = {rows[0]:rows[1] for rows in csv_reader}
|
| 117 |
+
|
| 118 |
+
# change term
|
| 119 |
+
for seg in self.segments:
|
| 120 |
+
ready_words = re.sub('\n', '\n ', seg.source_text).split(" ")
|
| 121 |
+
for i in range(len(ready_words)):
|
| 122 |
+
word = ready_words[i]
|
| 123 |
+
if word[-2:] == ".\n" :
|
| 124 |
+
if word[:-2].lower() in term_dict :
|
| 125 |
+
new_word = word.replace(word[:-2], term_dict.get(word[:-2].lower())) + ' '
|
| 126 |
+
ready_words[i] = new_word
|
| 127 |
+
else:
|
| 128 |
+
ready_words[i] = word + ' '
|
| 129 |
+
elif word.lower() in term_dict :
|
| 130 |
+
new_word = word.replace(word,term_dict.get(word.lower())) + ' '
|
| 131 |
+
ready_words[i] = new_word
|
| 132 |
+
else :
|
| 133 |
+
ready_words[i]= word + ' '
|
| 134 |
+
seg.source_text = re.sub('\n ', '\n', "".join(ready_words))
|
| 135 |
+
|
| 136 |
+
print(self)
|
| 137 |
pass
|
| 138 |
|
| 139 |
|