Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

flare / realtime_stt_manager.py

ciyidogan

Update realtime_stt_manager.py

19f1bba verified 4 months ago

raw

history blame

4.15 kB

	"""
	Real-time STT Manager for streaming transcription
	"""
	from typing import AsyncIterator, Optional, Dict, Any
	import asyncio
	from datetime import datetime
	import sys

	from stt_interface import STTInterface, STTConfig, TranscriptionResult
	from config_provider import ConfigProvider
	from logger import log_info, log_error, log_warning, log_debug

	class STTStreamManager:
	"""Manages real-time STT streaming"""

	def __init__(self):
	self.stt_provider: Optional[STTInterface] = None
	self.is_streaming = False
	self.config = None
	self.accumulated_text = ""
	self.last_final_result = None

	async def initialize(self, stt_provider: STTInterface, config: Dict[str, Any]):
	"""Initialize STT stream manager"""
	self.stt_provider = stt_provider

	# STTConfig objesi oluştur
	self.config = STTConfig(
	language=config.get("language", "tr-TR"),
	sample_rate=config.get("sample_rate", 16000),
	encoding=config.get("encoding", "WEBM_OPUS"),
	enable_punctuation=config.get("enable_punctuation", True),
	interim_results=config.get("interim_results", True),
	single_utterance=False, # Important for continuous listening
	speech_timeout_ms=config.get("speech_timeout_ms", 2000),
	vad_enabled=config.get("vad_enabled", True),
	noise_reduction_enabled=config.get("noise_reduction_enabled", True),
	noise_reduction_level=config.get("noise_reduction_level", 2),
	enable_word_timestamps=config.get("enable_word_timestamps", False),
	model=config.get("model", "latest_long"),
	use_enhanced=config.get("use_enhanced", True)
	)

	# Start streaming session - artık STTConfig objesi gönderiyoruz
	await self.stt_provider.start_streaming(self.config)
	self.is_streaming = True
	log_info("✅ STT stream manager initialized")

	async def process_chunk(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
	"""Process audio chunk and yield transcription results"""
	if not self.is_streaming or not self.stt_provider:
	log_info("⚠️ STT not streaming or provider not initialized")
	return

	try:
	# Stream audio to STT provider
	async for result in self.stt_provider.stream_audio(audio_chunk):
	# Update accumulated text for final results
	if result.is_final:
	self.accumulated_text = result.text
	self.last_final_result = result

	yield result

	except Exception as e:
	log_error("❌ STT processing error", e)
	# Yield error result
	yield TranscriptionResult(
	text="",
	is_final=False,
	confidence=0.0,
	timestamp=datetime.now().timestamp(),
	is_interrupt=True
	)

	async def stop_streaming(self) -> Optional[TranscriptionResult]:
	"""Stop streaming and get final result"""
	if not self.is_streaming or not self.stt_provider:
	return None

	try:
	self.is_streaming = False
	final_result = await self.stt_provider.stop_streaming()

	if final_result:
	self.accumulated_text = final_result.text
	self.last_final_result = final_result

	log_info("✅ STT streaming stopped")
	return final_result

	except Exception as e:
	log_error("❌ Error stopping STT stream", e)
	return None

	def reset(self):
	"""Reset accumulated text and state"""
	self.accumulated_text = ""
	self.last_final_result = None
	log_info("🔄 STT stream manager reset")

	def get_accumulated_text(self) -> str:
	"""Get all accumulated text from the session"""
	return self.accumulated_text