Spaces:
Running
Running
File size: 10,174 Bytes
1f6c376 6ba9626 1f6c376 6ba9626 1f6c376 6ba9626 1f6c376 6ba9626 1f6c376 6ba9626 1f6c376 6ba9626 1f6c376 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
# app.py - Main Gradio application
import gradio as gr
import os
import tempfile
import shutil
from pathlib import Path
import asyncio
from typing import List, Tuple, Generator
import logging
from datetime import datetime
# Import our custom modules
from segmenter import TextSegmenter
# --- CHANGE START ---
from tts_engine import CPUMultiSpeakerTTS # Updated class name
# --- CHANGE END ---
from audio_utils import AudioProcessor
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class PodXplainApp:
def __init__(self):
self.segmenter = TextSegmenter()
# --- CHANGE START ---
self.tts_engine = CPUMultiSpeakerTTS() # Updated class instantiation
# --- CHANGE END ---
self.audio_processor = AudioProcessor()
self.temp_dir = None
def create_temp_directory(self) -> str:
"""Create a temporary directory for processing."""
if self.temp_dir:
shutil.rmtree(self.temp_dir, ignore_errors=True)
self.temp_dir = tempfile.mkdtemp(prefix="podxplain_")
return self.temp_dir
def cleanup_temp_directory(self):
"""Clean up temporary files."""
if self.temp_dir and os.path.exists(self.temp_dir):
shutil.rmtree(self.temp_dir, ignore_errors=True)
self.temp_dir = None
def generate_podcast(
self,
text: str,
speaker_detection_mode: str = "auto",
progress=gr.Progress()
) -> Tuple[str, str]:
"""
Main function to convert text to podcast audio.
Args:
text: Input text (up to 50,000 characters)
speaker_detection_mode: How to detect speaker changes
progress: Gradio progress tracker
Returns:
Tuple of (audio_path, status_message)
"""
try:
# Validate input
if not text or len(text.strip()) == 0:
return None, "β Please provide some text to convert."
if len(text) > 50000:
return None, f"β Text too long ({len(text)} chars). Maximum is 50,000 characters."
# Create temporary directory
temp_dir = self.create_temp_directory()
progress(0, desc="π Starting podcast generation...")
# Step 1: Segment text and assign speakers
progress(0.1, desc="π Analyzing text and assigning speakers...")
segments = self.segmenter.segment_and_assign_speakers(
text, mode=speaker_detection_mode
)
if not segments:
return None, "β Could not process the text. Please check the input."
logger.info(f"Generated {len(segments)} segments")
# Step 2: Generate audio for each segment
progress(0.2, desc="π€ Generating audio segments...")
audio_files = []
for i, (speaker, segment_text) in enumerate(segments):
progress(
0.2 + (0.7 * i / len(segments)),
desc=f"π΅ Processing segment {i+1}/{len(segments)} (Speaker {speaker})"
)
# Generate audio for this segment
audio_path = self.tts_engine.synthesize_segment(
segment_text,
speaker,
os.path.join(temp_dir, f"segment_{i:03d}.wav")
)
if audio_path:
audio_files.append(audio_path)
else:
logger.warning(f"Failed to generate audio for segment {i}")
if not audio_files:
return None, "β Failed to generate any audio segments."
# Step 3: Merge audio files and convert to MP3
progress(0.9, desc="π§ Merging segments and converting to MP3...")
final_audio_path = self.audio_processor.merge_and_convert_to_mp3(
audio_files,
os.path.join(temp_dir, "podcast_output.mp3")
)
if not final_audio_path:
return None, "β Failed to merge audio segments."
progress(1.0, desc="β
Podcast generated successfully!")
# Generate summary
total_segments = len(segments)
speakers_used = len(set(speaker for speaker, _ in segments))
duration_estimate = len(text) / 1000 * 60 # Rough estimate: 1000 chars β 1 minute
status_message = f"""
β
**Podcast Generated Successfully!**
π **Statistics:**
- Total segments: {total_segments}
- Speakers used: {speakers_used}
- Estimated duration: {duration_estimate:.1f} minutes
- Character count: {len(text):,}
π§ **Your podcast is ready for download!**
"""
return final_audio_path, status_message
except Exception as e:
logger.error(f"Error generating podcast: {str(e)}")
return None, f"β Error: {str(e)}"
finally:
# Clean up temporary files (except the final output)
# Note: We keep the final MP3 for download
pass
def create_gradio_interface():
"""Create the Gradio interface."""
app = PodXplainApp()
# Custom CSS for better styling
css = """
.main-container {
max-width: 1200px;
margin: 0 auto;
}
.header {
text-align: center;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border-radius: 10px;
margin-bottom: 20px;
}
.footer {
text-align: center;
padding: 20px;
color: #666;
font-size: 0.9em;
}
"""
with gr.Blocks(css=css, title="PodXplainClone - CPU Podcast Generator") as interface: # Updated title
# Header
gr.HTML("""
<div class="header">
<h1>ποΈ PodXplainClone</h1>
<p><em>From script to story β voice it like never before, even on CPU.</em></p> <p style="font-size: 0.9em; margin-top: 10px;">
This space allows you to transform written dialogue into natural-sounding multi-speaker audio, optimized for CPU hardware.
It serves as a **CPU-friendly alternative and development sandbox** while the main PodXplain project awaits GPU resources for more advanced models.
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
# Input section
gr.Markdown("## π Input Your Script")
text_input = gr.Textbox(
label="Podcast Script",
placeholder="Enter your podcast script here (up to 50,000 characters).\n\nTip: Use paragraph breaks to help with speaker detection.",
lines=15,
max_lines=20,
show_label=True
)
char_count = gr.HTML("Characters: 0 / 50,000")
# Options
speaker_mode = gr.Radio(
choices=["auto", "paragraph", "dialogue"],
value="auto",
label="Speaker Detection Mode",
info="How to detect when speakers change"
)
generate_btn = gr.Button(
"π€ Generate Podcast",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
# Output section
gr.Markdown("## π§ Your Podcast")
status_output = gr.Markdown("Ready to generate your podcast!")
audio_output = gr.Audio(
label="Generated Podcast",
show_download_button=True,
interactive=False
)
# Footer with instructions
gr.HTML("""
<div class="footer">
<h3>π How to Use PodXplainClone</h3>
<ol>
<li><strong>Write your script:</strong> Enter up to 50,000 characters of text</li>
<li><strong>Choose speaker mode:</strong> Auto-detect, paragraph-based, or dialogue-based</li>
<li><strong>Generate:</strong> Click the button and wait for processing</li>
<li><strong>Listen & Download:</strong> Your MP3 podcast will be ready!</li>
</ol>
<p><strong>π‘ Tips:</strong> Use clear paragraph breaks for better speaker detection.
Write naturally as if speaking to an audience.</p>
<p style="font-size: 0.8em; color: #999;">Powered by PodXplainClone • Developed by Nick021402</p>
<p style="font-size: 0.7em; color: #aaa;">This space runs on CPU hardware for accessibility. For the original project and GPU-powered advanced models, visit the main PodXplain space.</p>
</div>
""")
# JavaScript for character counting
text_input.change(
fn=lambda text: f"Characters: {len(text) if text else 0:,} / 50,000",
inputs=[text_input],
outputs=[char_count]
)
# Main generation function
generate_btn.click(
fn=app.generate_podcast,
inputs=[text_input, speaker_mode],
outputs=[audio_output, status_output],
show_progress=True
)
return interface
if __name__ == "__main__":
# Create and launch the interface
interface = create_gradio_interface()
interface.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
|