Fixed the issue that occurred after enabling the
Browse files"Word Timestamps - Highlight Words" feature.
- src/utils.py +10 -12
src/utils.py
CHANGED
|
@@ -189,7 +189,7 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
| 189 |
if highlight_words:
|
| 190 |
last = subtitle_start
|
| 191 |
|
| 192 |
-
for
|
| 193 |
start = this_word['start']
|
| 194 |
end = this_word['end']
|
| 195 |
|
|
@@ -207,15 +207,10 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
| 207 |
'end' : end,
|
| 208 |
'text' : __join_words(
|
| 209 |
[
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
# The HTML tags <u> and </u> are not displayed,
|
| 215 |
-
# # so they should not be counted in the word length
|
| 216 |
-
"length": len(word)
|
| 217 |
-
} for j, word in enumerate(text_words)
|
| 218 |
-
], maxLineWidth)
|
| 219 |
}
|
| 220 |
last = end
|
| 221 |
|
|
@@ -238,9 +233,9 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
| 238 |
result.update({'original': process_text(original_text, maxLineWidth)})
|
| 239 |
yield result
|
| 240 |
|
| 241 |
-
def __join_words(words: Iterator[
|
| 242 |
result = "".join(words)
|
| 243 |
-
|
| 244 |
if maxLineWidth is None or maxLineWidth < 0:
|
| 245 |
return result
|
| 246 |
|
|
@@ -273,6 +268,9 @@ def process_text(text: str, maxLineWidth=None):
|
|
| 273 |
if currentLine:
|
| 274 |
currentLine += " "
|
| 275 |
wordWidth += 1
|
|
|
|
|
|
|
|
|
|
| 276 |
for wordIdx, char in enumerate(word):
|
| 277 |
if unicodedata.east_asian_width(char) not in {'W', 'F'}:
|
| 278 |
wordWidth += 1
|
|
|
|
| 189 |
if highlight_words:
|
| 190 |
last = subtitle_start
|
| 191 |
|
| 192 |
+
for idx, this_word in enumerate(words):
|
| 193 |
start = this_word['start']
|
| 194 |
end = this_word['end']
|
| 195 |
|
|
|
|
| 207 |
'end' : end,
|
| 208 |
'text' : __join_words(
|
| 209 |
[
|
| 210 |
+
re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word) if subidx == idx else word
|
| 211 |
+
for subidx, word in enumerate(text_words)
|
| 212 |
+
]
|
| 213 |
+
, maxLineWidth)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
}
|
| 215 |
last = end
|
| 216 |
|
|
|
|
| 233 |
result.update({'original': process_text(original_text, maxLineWidth)})
|
| 234 |
yield result
|
| 235 |
|
| 236 |
+
def __join_words(words: Iterator[str], maxLineWidth: int = None):
|
| 237 |
result = "".join(words)
|
| 238 |
+
|
| 239 |
if maxLineWidth is None or maxLineWidth < 0:
|
| 240 |
return result
|
| 241 |
|
|
|
|
| 268 |
if currentLine:
|
| 269 |
currentLine += " "
|
| 270 |
wordWidth += 1
|
| 271 |
+
# The HTML tags <u> and </u> are not displayed,
|
| 272 |
+
# so they should not be counted in the word length
|
| 273 |
+
wordWidth -= 7 if "<u>" in word else 0
|
| 274 |
for wordIdx, char in enumerate(word):
|
| 275 |
if unicodedata.east_asian_width(char) not in {'W', 'F'}:
|
| 276 |
wordWidth += 1
|