Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from gtts import gTTS | |
import io | |
import tempfile | |
import os | |
import json | |
# Configuration (since we don't have the config.py file) | |
MODEL_CONFIG = { | |
"models": { | |
"granite-3b": "ibm-granite/granite-3b-code-base", | |
"granite-8b": "ibm-granite/granite-8b-code-base" | |
}, | |
"generation_params": { | |
"max_new_tokens": 512, | |
"temperature": 0.7, | |
"do_sample": True, | |
"pad_token_id": None | |
} | |
} | |
TTS_CONFIG = { | |
"engine": "gtts", | |
"voice_speed": 150, | |
"voice_volume": 0.9 | |
} | |
TONE_PROMPTS = { | |
"Neutral": "Rewrite the following text in a clear, neutral tone suitable for audiobook narration:", | |
"Suspenseful": "Rewrite the following text with suspenseful, engaging language that builds tension:", | |
"Inspiring": "Rewrite the following text in an inspiring, motivational tone that uplifts the reader:" | |
} | |
# Global variables to store model | |
model = None | |
tokenizer = None | |
model_loaded = False | |
def load_granite_model(model_name="granite-3b"): | |
"""Load IBM Granite model locally""" | |
global model, tokenizer, model_loaded | |
model_id = MODEL_CONFIG["models"][model_name] | |
try: | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
# Load model | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
device_map="auto" if torch.cuda.is_available() else None, | |
trust_remote_code=True | |
) | |
model_loaded = True | |
return "✅ Model loaded successfully!" | |
except Exception as e: | |
model_loaded = False | |
return f"❌ Error loading model: {str(e)}" | |
def rewrite_text_with_granite(text, tone): | |
"""Rewrite text using local Granite model""" | |
global model, tokenizer, model_loaded | |
if not model_loaded or model is None or tokenizer is None: | |
return text | |
try: | |
# Create prompt | |
prompt = f"{TONE_PROMPTS[tone]}\n\nOriginal text: {text}\n\nRewritten text:" | |
# Tokenize | |
inputs = tokenizer( | |
prompt, | |
return_tensors="pt", | |
truncation=True, | |
max_length=1024 | |
) | |
# Set pad_token_id for generation | |
generation_params = MODEL_CONFIG["generation_params"].copy() | |
generation_params["pad_token_id"] = tokenizer.pad_token_id | |
# Generate | |
with torch.no_grad(): | |
outputs = model.generate( | |
inputs.input_ids, | |
**generation_params, | |
attention_mask=inputs.attention_mask | |
) | |
# Decode | |
generated_text = tokenizer.decode( | |
outputs[0], | |
skip_special_tokens=True | |
) | |
# Extract only the rewritten part | |
if "Rewritten text:" in generated_text: | |
rewritten = generated_text.split("Rewritten text:")[-1].strip() | |
else: | |
rewritten = generated_text[len(prompt):].strip() | |
return rewritten if rewritten else text | |
except Exception as e: | |
return f"Error rewriting text: {str(e)}" | |
def generate_audio_gtts(text, language='en'): | |
"""Generate audio using Google Text-to-Speech""" | |
try: | |
tts = gTTS(text=text, lang=language, slow=False) | |
# Save to temporary file and return path | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: | |
tts.save(tmp_file.name) | |
return tmp_file.name | |
except Exception as e: | |
return None | |
def process_audiobook(input_text, uploaded_file, tone, model_choice): | |
"""Main processing function""" | |
global model_loaded | |
# Check if model is loaded | |
if not model_loaded: | |
return ( | |
"❌ Please load the AI model first!", | |
None, | |
None, | |
"Please click 'Load Model' button first." | |
) | |
# Determine input text | |
text_to_process = "" | |
if uploaded_file is not None: | |
try: | |
# Read uploaded file | |
content = uploaded_file.read() | |
if isinstance(content, bytes): | |
text_to_process = content.decode('utf-8') | |
else: | |
text_to_process = str(content) | |
except Exception as e: | |
return f"Error reading file: {str(e)}", None, None, "" | |
elif input_text: | |
text_to_process = input_text | |
else: | |
return "Please provide text input or upload a file.", None, None, "" | |
# Truncate if too long | |
if len(text_to_process) > 2000: | |
text_to_process = text_to_process[:2000] | |
status_msg = "⚠️ Text truncated to 2000 characters for optimal processing." | |
else: | |
status_msg = f"✅ Processing {len(text_to_process)} characters." | |
# Rewrite text with AI | |
try: | |
rewritten_text = rewrite_text_with_granite(text_to_process, tone) | |
except Exception as e: | |
return f"Error in text rewriting: {str(e)}", None, None, "" | |
# Generate audio | |
try: | |
audio_file_path = generate_audio_gtts(rewritten_text) | |
if audio_file_path is None: | |
return status_msg, text_to_process, rewritten_text, "❌ Failed to generate audio." | |
except Exception as e: | |
return status_msg, text_to_process, rewritten_text, f"Error generating audio: {str(e)}" | |
return ( | |
status_msg, | |
text_to_process, | |
rewritten_text, | |
audio_file_path | |
) | |
def get_model_status(): | |
"""Get current model status""" | |
global model_loaded | |
if model_loaded: | |
device = "GPU" if torch.cuda.is_available() else "CPU" | |
return f"✅ Model loaded on {device}" | |
else: | |
return "❌ Model not loaded" | |
# Create Gradio interface | |
def create_interface(): | |
with gr.Blocks( | |
title="EchoVerse - Local AI Audiobook Creator", | |
theme=gr.themes.Soft(), | |
css=""" | |
.gradio-container { | |
font-family: 'Arial', sans-serif; | |
} | |
.main-header { | |
text-align: center; | |
color: #2E86AB; | |
margin-bottom: 20px; | |
} | |
.status-box { | |
padding: 10px; | |
border-radius: 5px; | |
margin: 10px 0; | |
} | |
""" | |
) as demo: | |
# Header | |
gr.HTML(""" | |
<div class="main-header"> | |
<h1>��� EchoVerse Local</h1> | |
<h3>Transform Text into Expressive Audiobooks with Local AI</h3> | |
<p><i>Powered by IBM Granite 3B - No internet required for AI processing!</i></p> | |
</div> | |
""") | |
# Model Setup Section | |
with gr.Group(): | |
gr.HTML("<h2>��� AI Model Setup</h2>") | |
with gr.Row(): | |
model_choice = gr.Dropdown( | |
choices=list(MODEL_CONFIG["models"].keys()), | |
value="granite-3b", | |
label="Choose Granite Model", | |
info="3B model is recommended for most computers. 8B requires more RAM." | |
) | |
load_btn = gr.Button("Load Model", variant="primary") | |
model_status = gr.Textbox( | |
label="Model Status", | |
value="❌ Model not loaded", | |
interactive=False | |
) | |
# Input Section | |
with gr.Group(): | |
gr.HTML("<h2>��� Input Your Content</h2>") | |
uploaded_file = gr.File( | |
label="Upload a text file", | |
file_types=[".txt"], | |
type="binary" | |
) | |
input_text = gr.Textbox( | |
label="Or paste your text here:", | |
lines=8, | |
placeholder="Enter the text you want to convert to an audiobook...", | |
max_lines=15 | |
) | |
# Configuration Section | |
with gr.Group(): | |
gr.HTML("<h2>⚙️ Audio Configuration</h2>") | |
with gr.Row(): | |
tone = gr.Dropdown( | |
choices=["Neutral", "Suspenseful", "Inspiring"], | |
value="Neutral", | |
label="Select Tone", | |
info="Choose how you want the text to be rewritten" | |
) | |
# Generate Button | |
generate_btn = gr.Button("��� Generate Audiobook", variant="primary", size="lg") | |
# Results Section | |
with gr.Group(): | |
gr.HTML("<h2>��� Results</h2>") | |
status_output = gr.Textbox( | |
label="Status", | |
interactive=False | |
) | |
with gr.Row(): | |
original_text = gr.Textbox( | |
label="Original Text", | |
lines=10, | |
interactive=False | |
) | |
rewritten_text = gr.Textbox( | |
label="Rewritten Text", | |
lines=10, | |
interactive=False | |
) | |
# Audio Output | |
gr.HTML("<h2>��� Your Audiobook</h2>") | |
audio_output = gr.Audio( | |
label="Generated Audiobook", | |
type="filepath" | |
) | |
# System Info | |
with gr.Group(): | |
gr.HTML("<h2>��� System Info</h2>") | |
system_info = gr.HTML(f""" | |
<div> | |
<p><strong>GPU Available:</strong> {'✅ Yes' if torch.cuda.is_available() else '❌ No (CPU only)'}</p> | |
<p><strong>TTS Engine:</strong> {TTS_CONFIG['engine']}</p> | |
</div> | |
<h3>��� Tips</h3> | |
<ul> | |
<li>First model load takes time</li> | |
<li>3B model: ~6GB RAM needed</li> | |
<li>8B model: ~16GB RAM needed</li> | |
<li>GPU greatly speeds up processing</li> | |
<li>gTTS requires internet connection</li> | |
</ul> | |
""") | |
# Event handlers | |
load_btn.click( | |
fn=load_granite_model, | |
inputs=[model_choice], | |
outputs=[model_status] | |
) | |
generate_btn.click( | |
fn=process_audiobook, | |
inputs=[input_text, uploaded_file, tone, model_choice], | |
outputs=[status_output, original_text, rewritten_text, audio_output] | |
) | |
return demo | |
# Launch the app | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=False | |
) | |