cnph001 commited on
Commit
d3fce98
·
verified ·
1 Parent(s): 7987224

Parse text

Browse files

Parse input text - recognize paragraph by double ENTER
process each paragraph at a time

Files changed (1) hide show
  1. app.py +41 -12
app.py CHANGED
@@ -10,13 +10,10 @@ async def get_voices():
10
  voices = await edge_tts.list_voices()
11
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
12
 
13
- # Text-to-speech function
14
- async def text_to_speech(text, voice, rate, pitch):
15
  if not text.strip():
16
- return None, gr.Warning("Please enter text to convert.")
17
- if not voice:
18
- return None, gr.Warning("Please select a voice.")
19
-
20
  voice_short_name = voice.split(" - ")[0]
21
  rate_str = f"{rate:+d}%"
22
  pitch_str = f"{pitch:+d}Hz"
@@ -24,7 +21,37 @@ async def text_to_speech(text, voice, rate, pitch):
24
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
25
  tmp_path = tmp_file.name
26
  await communicate.save(tmp_path)
27
- return tmp_path, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # Gradio interface function
30
  @spaces.GPU
@@ -37,15 +64,17 @@ import gradio as gr
37
 
38
  async def create_demo():
39
  voices = await get_voices()
40
-
41
  description = """
42
  Experience the power of Voicecloning.be for text-to-speech conversion.
 
 
43
  """
44
-
45
  demo = gr.Interface(
46
  fn=tts_interface,
47
  inputs=[
48
- gr.Textbox(label="Input Text", lines=5),
49
  gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
50
  gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
51
  gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
@@ -54,9 +83,9 @@ async def create_demo():
54
  gr.Audio(label="Generated Audio", type="filepath"),
55
  gr.Markdown(label="Warning", visible=False)
56
  ],
57
- title="Voicecloning.be Text-to-Speech",
58
  description=description,
59
- article="Experience the power of Voicecloning.be for text-to-speech conversion.",
60
  analytics_enabled=False,
61
  allow_flagging=False
62
  )
 
10
  voices = await edge_tts.list_voices()
11
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
12
 
13
+ # Text-to-speech function for a single paragraph
14
+ async def paragraph_to_speech(text, voice, rate, pitch):
15
  if not text.strip():
16
+ return None
 
 
 
17
  voice_short_name = voice.split(" - ")[0]
18
  rate_str = f"{rate:+d}%"
19
  pitch_str = f"{pitch:+d}Hz"
 
21
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
22
  tmp_path = tmp_file.name
23
  await communicate.save(tmp_path)
24
+ return tmp_path
25
+
26
+ # Main text-to-speech function that processes paragraphs
27
+ async def text_to_speech(text, voice, rate, pitch):
28
+ if not text.strip():
29
+ return None, gr.Warning("Please enter text to convert.")
30
+ if not voice:
31
+ return None, gr.Warning("Please select a voice.")
32
+
33
+ paragraphs = [p for p in text.split("\n\n") if p.strip()]
34
+ audio_files = []
35
+ for paragraph in paragraphs:
36
+ audio_path = await paragraph_to_speech(paragraph, voice, rate, pitch)
37
+ if audio_path:
38
+ audio_files.append(audio_path)
39
+
40
+ if not audio_files:
41
+ return None, None # No audio generated
42
+
43
+ # Combine audio files if there are multiple paragraphs
44
+ if len(audio_files) == 1:
45
+ return audio_files[0], None
46
+ else:
47
+ # Simple concatenation for now - consider using a proper audio editing library for smoother transitions
48
+ combined_audio_path = tempfile.mktemp(suffix=".mp3")
49
+ with open(combined_audio_path, 'wb') as outfile:
50
+ for filename in audio_files:
51
+ with open(filename, 'rb') as infile:
52
+ outfile.write(infile.read())
53
+ os.remove(filename) # Clean up individual files
54
+ return combined_audio_path, None
55
 
56
  # Gradio interface function
57
  @spaces.GPU
 
64
 
65
  async def create_demo():
66
  voices = await get_voices()
67
+
68
  description = """
69
  Experience the power of Voicecloning.be for text-to-speech conversion.
70
+ Enter your text, select a voice, and adjust the speech rate and pitch.
71
+ The application will process your text paragraph by paragraph (separated by two blank lines).
72
  """
73
+
74
  demo = gr.Interface(
75
  fn=tts_interface,
76
  inputs=[
77
+ gr.Textbox(label="Input Text", lines=5, placeholder="Separate paragraphs with two blank lines."),
78
  gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
79
  gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
80
  gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
 
83
  gr.Audio(label="Generated Audio", type="filepath"),
84
  gr.Markdown(label="Warning", visible=False)
85
  ],
86
+ title="Voicecloning.be Text-to-Speech (Paragraph by Paragraph)",
87
  description=description,
88
+ article="Process text paragraph by paragraph for smoother output.",
89
  analytics_enabled=False,
90
  allow_flagging=False
91
  )