File size: 1,027 Bytes
5da9a16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
from dataclasses import dataclass

@dataclass
class ModelConfig:
    # Whisper ASR
    whisper_model: str = "openai/whisper-medium"
    whisper_language: str = "id"
    
    # Speaker Diarization
    diarization_model: str = "pyannote/speaker-diarization-3.1"
    min_speakers: int = 1
    max_speakers: int = 10
    
    # Text Processing
    summarization_model: str = "bert-base-multilingual-cased"
    ner_model: str = "cahya/bert-base-indonesian-NER"
    keyword_model: str = "paraphrase-multilingual-MiniLM-L12-v2"
    
    # Processing Parameters
    chunk_size: int = 3000
    chunk_overlap: int = 200
    summary_ratio: float = 0.3
    max_summary_sentences: int = 6
    
    # Output
    output_formats: list = None
    
    def __post_init__(self):
        if self.output_formats is None:
            self.output_formats = ["markdown", "json", "html"]
        
        # Set HF token from environment
        self.hf_token = os.environ.get("HF_TOKEN", None)

# Global config instance
config = ModelConfig()