��� EchoVerse Local
Transform Text into Expressive Audiobooks with Local AI
Powered by IBM Granite 3B - No internet required for AI processing!
import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM from gtts import gTTS import io import tempfile import os import json # Configuration (since we don't have the config.py file) MODEL_CONFIG = { "models": { "granite-3b": "ibm-granite/granite-3b-code-base", "granite-8b": "ibm-granite/granite-8b-code-base" }, "generation_params": { "max_new_tokens": 512, "temperature": 0.7, "do_sample": True, "pad_token_id": None } } TTS_CONFIG = { "engine": "gtts", "voice_speed": 150, "voice_volume": 0.9 } TONE_PROMPTS = { "Neutral": "Rewrite the following text in a clear, neutral tone suitable for audiobook narration:", "Suspenseful": "Rewrite the following text with suspenseful, engaging language that builds tension:", "Inspiring": "Rewrite the following text in an inspiring, motivational tone that uplifts the reader:" } # Global variables to store model model = None tokenizer = None model_loaded = False def load_granite_model(model_name="granite-3b"): """Load IBM Granite model locally""" global model, tokenizer, model_loaded model_id = MODEL_CONFIG["models"][model_name] try: # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_id) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load model model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, trust_remote_code=True ) model_loaded = True return "✅ Model loaded successfully!" except Exception as e: model_loaded = False return f"❌ Error loading model: {str(e)}" def rewrite_text_with_granite(text, tone): """Rewrite text using local Granite model""" global model, tokenizer, model_loaded if not model_loaded or model is None or tokenizer is None: return text try: # Create prompt prompt = f"{TONE_PROMPTS[tone]}\n\nOriginal text: {text}\n\nRewritten text:" # Tokenize inputs = tokenizer( prompt, return_tensors="pt", truncation=True, max_length=1024 ) # Set pad_token_id for generation generation_params = MODEL_CONFIG["generation_params"].copy() generation_params["pad_token_id"] = tokenizer.pad_token_id # Generate with torch.no_grad(): outputs = model.generate( inputs.input_ids, **generation_params, attention_mask=inputs.attention_mask ) # Decode generated_text = tokenizer.decode( outputs[0], skip_special_tokens=True ) # Extract only the rewritten part if "Rewritten text:" in generated_text: rewritten = generated_text.split("Rewritten text:")[-1].strip() else: rewritten = generated_text[len(prompt):].strip() return rewritten if rewritten else text except Exception as e: return f"Error rewriting text: {str(e)}" def generate_audio_gtts(text, language='en'): """Generate audio using Google Text-to-Speech""" try: tts = gTTS(text=text, lang=language, slow=False) # Save to temporary file and return path with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: tts.save(tmp_file.name) return tmp_file.name except Exception as e: return None def process_audiobook(input_text, uploaded_file, tone, model_choice): """Main processing function""" global model_loaded # Check if model is loaded if not model_loaded: return ( "❌ Please load the AI model first!", None, None, "Please click 'Load Model' button first." ) # Determine input text text_to_process = "" if uploaded_file is not None: try: # Read uploaded file content = uploaded_file.read() if isinstance(content, bytes): text_to_process = content.decode('utf-8') else: text_to_process = str(content) except Exception as e: return f"Error reading file: {str(e)}", None, None, "" elif input_text: text_to_process = input_text else: return "Please provide text input or upload a file.", None, None, "" # Truncate if too long if len(text_to_process) > 2000: text_to_process = text_to_process[:2000] status_msg = "⚠️ Text truncated to 2000 characters for optimal processing." else: status_msg = f"✅ Processing {len(text_to_process)} characters." # Rewrite text with AI try: rewritten_text = rewrite_text_with_granite(text_to_process, tone) except Exception as e: return f"Error in text rewriting: {str(e)}", None, None, "" # Generate audio try: audio_file_path = generate_audio_gtts(rewritten_text) if audio_file_path is None: return status_msg, text_to_process, rewritten_text, "❌ Failed to generate audio." except Exception as e: return status_msg, text_to_process, rewritten_text, f"Error generating audio: {str(e)}" return ( status_msg, text_to_process, rewritten_text, audio_file_path ) def get_model_status(): """Get current model status""" global model_loaded if model_loaded: device = "GPU" if torch.cuda.is_available() else "CPU" return f"✅ Model loaded on {device}" else: return "❌ Model not loaded" # Create Gradio interface def create_interface(): with gr.Blocks( title="EchoVerse - Local AI Audiobook Creator", theme=gr.themes.Soft(), css=""" .gradio-container { font-family: 'Arial', sans-serif; } .main-header { text-align: center; color: #2E86AB; margin-bottom: 20px; } .status-box { padding: 10px; border-radius: 5px; margin: 10px 0; } """ ) as demo: # Header gr.HTML("""
Powered by IBM Granite 3B - No internet required for AI processing!
GPU Available: {'✅ Yes' if torch.cuda.is_available() else '❌ No (CPU only)'}
TTS Engine: {TTS_CONFIG['engine']}