Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -313,13 +313,14 @@ def segment_audio_from_video(video_path):
|
|
313 |
]
|
314 |
|
315 |
return audio_path, transcript_with_speakers
|
316 |
-
|
317 |
def clean_transcribed_text(text: str) -> str:
|
318 |
-
"""
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
|
|
|
|
323 |
|
324 |
def transcribe_segments_with_scribe(full_audio_path, segments):
|
325 |
transcribed_segments = []
|
@@ -388,7 +389,7 @@ def transcribe_segments_with_scribe(full_audio_path, segments):
|
|
388 |
audio_clip.close()
|
389 |
|
390 |
return transcribed_segments, detected_language, error_message
|
391 |
-
|
392 |
# Function to get the appropriate translation model based on target language
|
393 |
def get_translation_model(source_language, target_language):
|
394 |
"""
|
|
|
313 |
]
|
314 |
|
315 |
return audio_path, transcript_with_speakers
|
|
|
316 |
def clean_transcribed_text(text: str) -> str:
|
317 |
+
"""
|
318 |
+
Remove noise tags like (panting), [booming sound], repeated symbols, and trim whitespace.
|
319 |
+
"""
|
320 |
+
text = re.sub(r"[\(\[\{].*?[\)\]\}]", "", text)
|
321 |
+
text = re.sub(r"[_,.~`^•·。!?!?,,\.\/\\\-–—=+]+", " ", text)
|
322 |
+
text = re.sub(r"\s+", " ", text).strip()
|
323 |
+
return text
|
324 |
|
325 |
def transcribe_segments_with_scribe(full_audio_path, segments):
|
326 |
transcribed_segments = []
|
|
|
389 |
audio_clip.close()
|
390 |
|
391 |
return transcribed_segments, detected_language, error_message
|
392 |
+
|
393 |
# Function to get the appropriate translation model based on target language
|
394 |
def get_translation_model(source_language, target_language):
|
395 |
"""
|