riu-rd's picture
Upload 17 files
af74f3c verified
import whisper
import torch
import tempfile
import os
from typing import Dict
from services.text_processor import process_text_to_insight
from pydantic import BaseModel
# Add the TextRequest model definition here or import it
class TextRequest(BaseModel):
text: str
# Determine the most efficient device available (CUDA if possible, otherwise CPU)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Load the Whisper model once when the module is imported.
# This is a time and resource-intensive operation, so it should not be done on every API call.
try:
print(f"Loading Whisper model 'large' onto device '{DEVICE}'...")
model = whisper.load_model("large", device=DEVICE)
print("Whisper model loaded successfully.")
except Exception as e:
print(f"Fatal: Error loading Whisper model: {e}")
model = None
def process_audio_with_whisper(audio_bytes: bytes):
"""
Transcribes and translates a given audio file's bytes using the Whisper model.
This function saves the audio bytes to a temporary file and passes the file
path to Whisper for processing. This is a robust way to handle file access
and prevent permission errors with ffmpeg, especially on Windows.
Args:
audio_bytes: The raw bytes of the audio file (e.g., WAV, MP3).
Returns:
A dictionary containing the Tagalog transcription and English translation.
Example: {"transcription": "...", "translation": "...", "insights": "..."}
Raises:
ValueError: If the Whisper model was not loaded successfully.
Exception: If audio processing or model inference fails.
"""
if model is None:
raise ValueError("Whisper model is not available or failed to load.")
# Create a temporary file to store the audio.
# Using delete=False is crucial on Windows to allow other processes to open the file by its path.
# We will manually delete the file in the 'finally' block.
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".tmp") as temp_audio_file:
temp_path = temp_audio_file.name
# Write the uploaded audio bytes to the temporary file
temp_audio_file.write(audio_bytes)
# The file is automatically closed when exiting the 'with' block
except Exception as e:
print(f"Error creating temporary file: {e}")
raise
try:
# Perform transcription using the file path
transcription_result = model.transcribe(
temp_path,
language="tl",
task="transcribe"
)
# Perform translation using the same file path
translation_result = model.transcribe(
temp_path,
language="tl",
task="translate"
)
# Get the transcribed text
transcribed_text = transcription_result.get('text', '').strip()
insights = process_text_to_insight(transcribed_text)
return {
"transcription": transcribed_text,
"translation": translation_result.get('text', '').strip(),
"insights": insights
}
except Exception as e:
# Log and re-raise any exceptions to be handled by the FastAPI endpoint
print(f"An error occurred during Whisper processing: {e}")
raise
finally:
# Ensure the temporary file is deleted after processing
if 'temp_path' in locals() and os.path.exists(temp_path):
os.remove(temp_path)
print("=== Debug Whisper Output ===")