Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

flare / stt_interface.py

ciyidogan

Create stt_interface.py

0da26ae verified 4 months ago

raw

history blame

2.03 kB

	"""
	STT (Speech-to-Text) Interface and Data Models
	"""

	from abc import ABC, abstractmethod
	from typing import Optional, Dict, Any, AsyncIterator, List
	from dataclasses import dataclass
	from enum import Enum
	import json

	class STTEngineType(Enum):
	NO_STT = "no_stt"
	GOOGLE = "google"
	AZURE = "azure"
	AMAZON = "amazon"
	FLICKER = "flicker"

	@dataclass
	class STTConfig:
	"""STT configuration parameters"""
	language: str = "tr-TR"
	sample_rate: int = 16000
	encoding: str = "WEBM_OPUS"
	enable_punctuation: bool = True
	enable_word_timestamps: bool = False
	model: str = "latest_long"
	use_enhanced: bool = True
	single_utterance: bool = False
	interim_results: bool = True

	# Voice Activity Detection
	vad_enabled: bool = True
	speech_timeout_ms: int = 2000

	# Noise reduction
	noise_reduction_enabled: bool = True
	noise_reduction_level: int = 2

	@dataclass
	class TranscriptionResult:
	"""Result from STT engine"""
	text: str
	is_final: bool
	confidence: float
	timestamp: float
	word_timestamps: Optional[List[Dict]] = None
	language: Optional[str] = None
	is_interrupt: bool = False

	class STTInterface(ABC):
	"""Abstract base class for STT providers"""

	@abstractmethod
	async def start_streaming(self, config: STTConfig) -> None:
	"""Start streaming session"""
	pass

	@abstractmethod
	async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
	"""Stream audio chunk and get transcription results"""
	pass

	@abstractmethod
	async def stop_streaming(self) -> Optional[TranscriptionResult]:
	"""Stop streaming and get final result"""
	pass

	@abstractmethod
	def supports_realtime(self) -> bool:
	"""Check if provider supports real-time streaming"""
	pass

	@abstractmethod
	def get_supported_languages(self) -> List[str]:
	"""Get list of supported language codes"""
	pass