Spaces:
Sleeping
Sleeping
File size: 1,027 Bytes
5da9a16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import os
from dataclasses import dataclass
@dataclass
class ModelConfig:
# Whisper ASR
whisper_model: str = "openai/whisper-medium"
whisper_language: str = "id"
# Speaker Diarization
diarization_model: str = "pyannote/speaker-diarization-3.1"
min_speakers: int = 1
max_speakers: int = 10
# Text Processing
summarization_model: str = "bert-base-multilingual-cased"
ner_model: str = "cahya/bert-base-indonesian-NER"
keyword_model: str = "paraphrase-multilingual-MiniLM-L12-v2"
# Processing Parameters
chunk_size: int = 3000
chunk_overlap: int = 200
summary_ratio: float = 0.3
max_summary_sentences: int = 6
# Output
output_formats: list = None
def __post_init__(self):
if self.output_formats is None:
self.output_formats = ["markdown", "json", "html"]
# Set HF token from environment
self.hf_token = os.environ.get("HF_TOKEN", None)
# Global config instance
config = ModelConfig() |