teachingAssistant / src /application /dtos /processing_request_dto.py
Michael Hu
set parakeet model to default asr
f7aaf3b
"""Processing Request Data Transfer Object"""
from dataclasses import dataclass
from typing import Optional, Dict, Any
from .audio_upload_dto import AudioUploadDto
@dataclass
class ProcessingRequestDto:
"""DTO for pipeline input parameters
Contains all parameters needed to process audio through
the STT -> Translation -> TTS pipeline.
"""
audio: AudioUploadDto
asr_model: str
target_language: str
voice: str
speed: float = 1.0
source_language: Optional[str] = None
additional_params: Optional[Dict[str, Any]] = None
def __post_init__(self):
"""Validate the DTO after initialization"""
self._validate()
if self.additional_params is None:
self.additional_params = {}
def _validate(self):
"""Validate processing request parameters"""
if not isinstance(self.audio, AudioUploadDto):
raise ValueError("Audio must be an AudioUploadDto instance")
if not self.asr_model:
raise ValueError("ASR model cannot be empty")
# Validate ASR model options
supported_asr_models = ['parakeet', 'whisper-small', 'whisper-medium', 'whisper-large']
if self.asr_model not in supported_asr_models:
raise ValueError(f"Unsupported ASR model: {self.asr_model}. Supported: {supported_asr_models}")
if not self.target_language:
raise ValueError("Target language cannot be empty")
# Validate language codes (ISO 639-1)
supported_languages = [
'en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh',
'ar', 'hi', 'tr', 'pl', 'nl', 'sv', 'da', 'no', 'fi'
]
if self.target_language not in supported_languages:
raise ValueError(f"Unsupported target language: {self.target_language}. Supported: {supported_languages}")
if self.source_language and self.source_language not in supported_languages:
raise ValueError(f"Unsupported source language: {self.source_language}. Supported: {supported_languages}")
if not self.voice:
raise ValueError("Voice cannot be empty")
# Validate voice options
supported_voices = ['kokoro', 'dia', 'cosyvoice2', 'dummy']
if self.voice not in supported_voices:
raise ValueError(f"Unsupported voice: {self.voice}. Supported: {supported_voices}")
# Validate speed range
if not 0.5 <= self.speed <= 2.0:
raise ValueError(f"Speed must be between 0.5 and 2.0, got: {self.speed}")
# Validate additional params if provided
if self.additional_params and not isinstance(self.additional_params, dict):
raise ValueError("Additional params must be a dictionary")
@property
def requires_translation(self) -> bool:
"""Check if translation is required"""
if not self.source_language:
return True # Assume translation needed if source not specified
return self.source_language != self.target_language
def to_dict(self) -> dict:
"""Convert to dictionary representation"""
return {
'audio': self.audio.to_dict(),
'asr_model': self.asr_model,
'target_language': self.target_language,
'source_language': self.source_language,
'voice': self.voice,
'speed': self.speed,
'requires_translation': self.requires_translation,
'additional_params': self.additional_params or {}
}
@classmethod
def from_dict(cls, data: dict) -> 'ProcessingRequestDto':
"""Create instance from dictionary"""
audio_data = data.get('audio', {})
if isinstance(audio_data, dict):
# Reconstruct AudioUploadDto if needed
audio = AudioUploadDto(
filename=audio_data['filename'],
content=audio_data.get('content', b''),
content_type=audio_data['content_type']
)
else:
audio = audio_data
return cls(
audio=audio,
asr_model=data['asr_model'],
target_language=data['target_language'],
voice=data['voice'],
speed=data.get('speed', 1.0),
source_language=data.get('source_language'),
additional_params=data.get('additional_params')
)