Spaces:

Nick021402
/

PodXplainClone

Running

App Files Files Community

PodXplainClone / app.py

Nick021402

Update app.py

6ba9626 verified 3 months ago

raw

history blame contribute delete

10.2 kB

	# app.py - Main Gradio application
	import gradio as gr
	import os
	import tempfile
	import shutil
	from pathlib import Path
	import asyncio
	from typing import List, Tuple, Generator
	import logging
	from datetime import datetime

	# Import our custom modules
	from segmenter import TextSegmenter
	# --- CHANGE START ---
	from tts_engine import CPUMultiSpeakerTTS # Updated class name
	# --- CHANGE END ---
	from audio_utils import AudioProcessor

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class PodXplainApp:
	def __init__(self):
	self.segmenter = TextSegmenter()
	# --- CHANGE START ---
	self.tts_engine = CPUMultiSpeakerTTS() # Updated class instantiation
	# --- CHANGE END ---
	self.audio_processor = AudioProcessor()
	self.temp_dir = None

	def create_temp_directory(self) -> str:
	"""Create a temporary directory for processing."""
	if self.temp_dir:
	shutil.rmtree(self.temp_dir, ignore_errors=True)
	self.temp_dir = tempfile.mkdtemp(prefix="podxplain_")
	return self.temp_dir

	def cleanup_temp_directory(self):
	"""Clean up temporary files."""
	if self.temp_dir and os.path.exists(self.temp_dir):
	shutil.rmtree(self.temp_dir, ignore_errors=True)
	self.temp_dir = None

	def generate_podcast(
	self,
	text: str,
	speaker_detection_mode: str = "auto",
	progress=gr.Progress()
	) -> Tuple[str, str]:
	"""
	Main function to convert text to podcast audio.

	Args:
	text: Input text (up to 50,000 characters)
	speaker_detection_mode: How to detect speaker changes
	progress: Gradio progress tracker

	Returns:
	Tuple of (audio_path, status_message)
	"""
	try:
	# Validate input
	if not text or len(text.strip()) == 0:
	return None, "❌ Please provide some text to convert."

	if len(text) > 50000:
	return None, f"❌ Text too long ({len(text)} chars). Maximum is 50,000 characters."

	# Create temporary directory
	temp_dir = self.create_temp_directory()
	progress(0, desc="🚀 Starting podcast generation...")

	# Step 1: Segment text and assign speakers
	progress(0.1, desc="📝 Analyzing text and assigning speakers...")
	segments = self.segmenter.segment_and_assign_speakers(
	text, mode=speaker_detection_mode
	)

	if not segments:
	return None, "❌ Could not process the text. Please check the input."

	logger.info(f"Generated {len(segments)} segments")

	# Step 2: Generate audio for each segment
	progress(0.2, desc="🎤 Generating audio segments...")
	audio_files = []

	for i, (speaker, segment_text) in enumerate(segments):
	progress(
	0.2 + (0.7 * i / len(segments)),
	desc=f"🎵 Processing segment {i+1}/{len(segments)} (Speaker {speaker})"
	)

	# Generate audio for this segment
	audio_path = self.tts_engine.synthesize_segment(
	segment_text,
	speaker,
	os.path.join(temp_dir, f"segment_{i:03d}.wav")
	)

	if audio_path:
	audio_files.append(audio_path)
	else:
	logger.warning(f"Failed to generate audio for segment {i}")

	if not audio_files:
	return None, "❌ Failed to generate any audio segments."

	# Step 3: Merge audio files and convert to MP3
	progress(0.9, desc="🔧 Merging segments and converting to MP3...")
	final_audio_path = self.audio_processor.merge_and_convert_to_mp3(
	audio_files,
	os.path.join(temp_dir, "podcast_output.mp3")
	)

	if not final_audio_path:
	return None, "❌ Failed to merge audio segments."

	progress(1.0, desc="✅ Podcast generated successfully!")

	# Generate summary
	total_segments = len(segments)
	speakers_used = len(set(speaker for speaker, _ in segments))
	duration_estimate = len(text) / 1000 * 60 # Rough estimate: 1000 chars ≈ 1 minute

	status_message = f"""
	✅ Podcast Generated Successfully!

	📊 Statistics:
	- Total segments: {total_segments}
	- Speakers used: {speakers_used}
	- Estimated duration: {duration_estimate:.1f} minutes
	- Character count: {len(text):,}

	🎧 Your podcast is ready for download!
	"""

	return final_audio_path, status_message

	except Exception as e:
	logger.error(f"Error generating podcast: {str(e)}")
	return None, f"❌ Error: {str(e)}"

	finally:
	# Clean up temporary files (except the final output)
	# Note: We keep the final MP3 for download
	pass

	def create_gradio_interface():
	"""Create the Gradio interface."""
	app = PodXplainApp()

	# Custom CSS for better styling
	css = """
	.main-container {
	max-width: 1200px;
	margin: 0 auto;
	}
	.header {
	text-align: center;
	padding: 20px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	border-radius: 10px;
	margin-bottom: 20px;
	}
	.footer {
	text-align: center;
	padding: 20px;
	color: #666;
	font-size: 0.9em;
	}
	"""

	with gr.Blocks(css=css, title="PodXplainClone - CPU Podcast Generator") as interface: # Updated title
	# Header
	gr.HTML("""
	<div class="header">
	<h1>🎙️ PodXplainClone</h1>
	<p><em>From script to story — voice it like never before, even on CPU.</em></p> <p style="font-size: 0.9em; margin-top: 10px;">
	This space allows you to transform written dialogue into natural-sounding multi-speaker audio, optimized for CPU hardware.
	It serves as a CPU-friendly alternative and development sandbox while the main PodXplain project awaits GPU resources for more advanced models.
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=2):
	# Input section
	gr.Markdown("## 📝 Input Your Script")

	text_input = gr.Textbox(
	label="Podcast Script",
	placeholder="Enter your podcast script here (up to 50,000 characters).\n\nTip: Use paragraph breaks to help with speaker detection.",
	lines=15,
	max_lines=20,
	show_label=True
	)

	char_count = gr.HTML("Characters: 0 / 50,000")

	# Options
	speaker_mode = gr.Radio(
	choices=["auto", "paragraph", "dialogue"],
	value="auto",
	label="Speaker Detection Mode",
	info="How to detect when speakers change"
	)

	generate_btn = gr.Button(
	"🎤 Generate Podcast",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	# Output section
	gr.Markdown("## 🎧 Your Podcast")

	status_output = gr.Markdown("Ready to generate your podcast!")

	audio_output = gr.Audio(
	label="Generated Podcast",
	show_download_button=True,
	interactive=False
	)

	# Footer with instructions
	gr.HTML("""
	<div class="footer">
	<h3>📋 How to Use PodXplainClone</h3>
	<ol>
	<li><strong>Write your script:</strong> Enter up to 50,000 characters of text</li>
	<li><strong>Choose speaker mode:</strong> Auto-detect, paragraph-based, or dialogue-based</li>
	<li><strong>Generate:</strong> Click the button and wait for processing</li>
	<li><strong>Listen & Download:</strong> Your MP3 podcast will be ready!</li>
	</ol>
	<p><strong>💡 Tips:</strong> Use clear paragraph breaks for better speaker detection.
	Write naturally as if speaking to an audience.</p>
	<p style="font-size: 0.8em; color: #999;">Powered by PodXplainClone • Developed by Nick021402</p>
	<p style="font-size: 0.7em; color: #aaa;">This space runs on CPU hardware for accessibility. For the original project and GPU-powered advanced models, visit the main PodXplain space.</p>
	</div>
	""")

	# JavaScript for character counting
	text_input.change(
	fn=lambda text: f"Characters: {len(text) if text else 0:,} / 50,000",
	inputs=[text_input],
	outputs=[char_count]
	)

	# Main generation function
	generate_btn.click(
	fn=app.generate_podcast,
	inputs=[text_input, speaker_mode],
	outputs=[audio_output, status_output],
	show_progress=True
	)

	return interface

	if __name__ == "__main__":
	# Create and launch the interface
	interface = create_gradio_interface()
	interface.launch(
	share=True,
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)