Commit
·
07cf4bd
1
Parent(s):
cf544a5
Update utils.py to fix openai-whisper compatibility and improve error handling
Browse files
utils.py
CHANGED
@@ -1,19 +1,32 @@
|
|
1 |
-
import openai_whisper as whisper
|
2 |
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
import os
|
4 |
|
5 |
# Load Whisper model
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def process_video(video_file, language):
|
|
|
|
|
|
|
9 |
# Save uploaded video locally
|
10 |
video_path = "/tmp/video.mp4"
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
14 |
try:
|
15 |
print("Transcribing video to English...")
|
16 |
result = model.transcribe(video_path, language="en")
|
|
|
17 |
|
18 |
segments = []
|
19 |
if language == "English":
|
@@ -42,7 +55,10 @@ def process_video(video_file, language):
|
|
42 |
"Arabic": "Helsinki-NLP/opus-mt-en-ar",
|
43 |
"Japanese": "Helsinki-NLP/opus-mt-en-jap"
|
44 |
}
|
45 |
-
model_name = model_map
|
|
|
|
|
|
|
46 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
47 |
translation_model = MarianMTModel.from_pretrained(model_name)
|
48 |
print(f"Translating to {language}...")
|
@@ -60,7 +76,8 @@ def process_video(video_file, language):
|
|
60 |
end = f"{segment['end']:.3f}".replace(".", ",")
|
61 |
text = segment["text"].strip()
|
62 |
f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
|
|
|
63 |
return srt_path
|
64 |
|
65 |
except Exception as e:
|
66 |
-
return f"Error: {str(e)}"
|
|
|
1 |
+
import openai_whisper as whisper # Use the correct import for openai-whisper
|
2 |
from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
import os
|
4 |
|
5 |
# Load Whisper model
|
6 |
+
try:
|
7 |
+
print("Loading Whisper model...")
|
8 |
+
model = whisper.load_model("base")
|
9 |
+
print("Whisper model loaded successfully!")
|
10 |
+
except Exception as e:
|
11 |
+
raise ImportError(f"Failed to load Whisper model: {e}")
|
12 |
|
13 |
def process_video(video_file, language):
|
14 |
+
"""
|
15 |
+
Process the uploaded video and generate subtitles in the specified language.
|
16 |
+
"""
|
17 |
# Save uploaded video locally
|
18 |
video_path = "/tmp/video.mp4"
|
19 |
+
try:
|
20 |
+
with open(video_path, "wb") as f:
|
21 |
+
f.write(video_file.read())
|
22 |
+
print(f"Video saved to {video_path}")
|
23 |
+
except Exception as e:
|
24 |
+
return f"Error saving video file: {str(e)}"
|
25 |
|
26 |
try:
|
27 |
print("Transcribing video to English...")
|
28 |
result = model.transcribe(video_path, language="en")
|
29 |
+
print("Transcription completed!")
|
30 |
|
31 |
segments = []
|
32 |
if language == "English":
|
|
|
55 |
"Arabic": "Helsinki-NLP/opus-mt-en-ar",
|
56 |
"Japanese": "Helsinki-NLP/opus-mt-en-jap"
|
57 |
}
|
58 |
+
model_name = model_map.get(language)
|
59 |
+
if not model_name:
|
60 |
+
return f"Unsupported language: {language}"
|
61 |
+
|
62 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
63 |
translation_model = MarianMTModel.from_pretrained(model_name)
|
64 |
print(f"Translating to {language}...")
|
|
|
76 |
end = f"{segment['end']:.3f}".replace(".", ",")
|
77 |
text = segment["text"].strip()
|
78 |
f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n")
|
79 |
+
print(f"SRT file created at {srt_path}")
|
80 |
return srt_path
|
81 |
|
82 |
except Exception as e:
|
83 |
+
return f"Error processing video: {str(e)}"
|