Michael Hu commited on
Commit
ae641cf
·
1 Parent(s): b2b15db

replace dummy tts class

Browse files
Files changed (1) hide show
  1. utils/tts_dummy.py +50 -8
utils/tts_dummy.py CHANGED
@@ -1,11 +1,53 @@
1
- def generate_speech(text: str, language: str = "zh") -> str:
2
- """Public interface for TTS generation
3
 
4
- This is a legacy function maintained for backward compatibility.
5
- New code should use the factory pattern implementation directly.
6
  """
7
- from utils.tts_base import DummyTTSEngine
8
 
9
- # Create a dummy engine and generate speech
10
- dummy_engine = DummyTTSEngine()
11
- return dummy_engine.generate_speech(text, "af_heart", 1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class DummyTTS(TTSBase):
2
+ """Dummy TTS engine that generates sine wave audio
3
 
4
+ This class is used as a fallback when no other TTS engine is available.
 
5
  """
 
6
 
7
+ def generate_speech(self, text: str, voice: str = 'default', speed: float = 1.0) -> str:
8
+ """Generate a dummy sine wave audio file
9
+
10
+ Args:
11
+ text (str): Input text (not used)
12
+ voice (str): Voice ID (not used)
13
+ speed (float): Speech speed multiplier (not used)
14
+
15
+ Returns:
16
+ str: Path to the generated audio file
17
+ """
18
+ logger.info(f"Generating dummy speech for text length: {len(text)}")
19
+
20
+ # Generate a simple sine wave
21
+ sample_rate = 24000
22
+ duration = min(len(text) / 20, 10) # Rough approximation of speech duration
23
+ t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
24
+ audio = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave
25
+
26
+ # Save to file
27
+ output_path = self._generate_output_path(prefix="dummy")
28
+ sf.write(output_path, audio, sample_rate)
29
+
30
+ logger.info(f"Generated dummy audio: {output_path}")
31
+ return output_path
32
+
33
+ def generate_speech_stream(self, text: str, voice: str = 'default', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]:
34
+ """Generate a dummy sine wave audio stream
35
+
36
+ Args:
37
+ text (str): Input text (not used)
38
+ voice (str): Voice ID (not used)
39
+ speed (float): Speech speed multiplier (not used)
40
+
41
+ Yields:
42
+ tuple: (sample_rate, audio_data) pairs
43
+ """
44
+ logger.info(f"Generating dummy speech stream for text length: {len(text)}")
45
+
46
+ # Generate a simple sine wave
47
+ sample_rate = 24000
48
+ duration = min(len(text) / 20, 10) # Rough approximation of speech duration
49
+ t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
50
+ audio = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave
51
+
52
+ # Yield the audio data
53
+ yield sample_rate, audio