File size: 10,965 Bytes
d39bedc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from gtts import gTTS
import io
import tempfile
import os
import json

# Configuration (since we don't have the config.py file)
MODEL_CONFIG = {
    "models": {
        "granite-3b": "ibm-granite/granite-3b-code-base",
        "granite-8b": "ibm-granite/granite-8b-code-base"
    },
    "generation_params": {
        "max_new_tokens": 512,
        "temperature": 0.7,
        "do_sample": True,
        "pad_token_id": None
    }
}

TTS_CONFIG = {
    "engine": "gtts",
    "voice_speed": 150,
    "voice_volume": 0.9
}

TONE_PROMPTS = {
    "Neutral": "Rewrite the following text in a clear, neutral tone suitable for audiobook narration:",
    "Suspenseful": "Rewrite the following text with suspenseful, engaging language that builds tension:",
    "Inspiring": "Rewrite the following text in an inspiring, motivational tone that uplifts the reader:"
}

# Global variables to store model
model = None
tokenizer = None
model_loaded = False

def load_granite_model(model_name="granite-3b"):
    """Load IBM Granite model locally"""
    global model, tokenizer, model_loaded
    
    model_id = MODEL_CONFIG["models"][model_name]
    
    try:
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # Load model
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto" if torch.cuda.is_available() else None,
            trust_remote_code=True
        )
        
        model_loaded = True
        return "✅ Model loaded successfully!"
    except Exception as e:
        model_loaded = False
        return f"❌ Error loading model: {str(e)}"

def rewrite_text_with_granite(text, tone):
    """Rewrite text using local Granite model"""
    global model, tokenizer, model_loaded
    
    if not model_loaded or model is None or tokenizer is None:
        return text
    
    try:
        # Create prompt
        prompt = f"{TONE_PROMPTS[tone]}\n\nOriginal text: {text}\n\nRewritten text:"
        
        # Tokenize
        inputs = tokenizer(
            prompt, 
            return_tensors="pt", 
            truncation=True, 
            max_length=1024
        )
        
        # Set pad_token_id for generation
        generation_params = MODEL_CONFIG["generation_params"].copy()
        generation_params["pad_token_id"] = tokenizer.pad_token_id
        
        # Generate
        with torch.no_grad():
            outputs = model.generate(
                inputs.input_ids,
                **generation_params,
                attention_mask=inputs.attention_mask
            )
        
        # Decode
        generated_text = tokenizer.decode(
            outputs[0], 
            skip_special_tokens=True
        )
        
        # Extract only the rewritten part
        if "Rewritten text:" in generated_text:
            rewritten = generated_text.split("Rewritten text:")[-1].strip()
        else:
            rewritten = generated_text[len(prompt):].strip()
        
        return rewritten if rewritten else text
        
    except Exception as e:
        return f"Error rewriting text: {str(e)}"

def generate_audio_gtts(text, language='en'):
    """Generate audio using Google Text-to-Speech"""
    try:
        tts = gTTS(text=text, lang=language, slow=False)
        
        # Save to temporary file and return path
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
            tts.save(tmp_file.name)
            return tmp_file.name
        
    except Exception as e:
        return None

def process_audiobook(input_text, uploaded_file, tone, model_choice):
    """Main processing function"""
    global model_loaded
    
    # Check if model is loaded
    if not model_loaded:
        return (
            "❌ Please load the AI model first!",
            None,
            None,
            "Please click 'Load Model' button first."
        )
    
    # Determine input text
    text_to_process = ""
    if uploaded_file is not None:
        try:
            # Read uploaded file
            content = uploaded_file.read()
            if isinstance(content, bytes):
                text_to_process = content.decode('utf-8')
            else:
                text_to_process = str(content)
        except Exception as e:
            return f"Error reading file: {str(e)}", None, None, ""
    elif input_text:
        text_to_process = input_text
    else:
        return "Please provide text input or upload a file.", None, None, ""
    
    # Truncate if too long
    if len(text_to_process) > 2000:
        text_to_process = text_to_process[:2000]
        status_msg = "⚠️ Text truncated to 2000 characters for optimal processing."
    else:
        status_msg = f"✅ Processing {len(text_to_process)} characters."
    
    # Rewrite text with AI
    try:
        rewritten_text = rewrite_text_with_granite(text_to_process, tone)
    except Exception as e:
        return f"Error in text rewriting: {str(e)}", None, None, ""
    
    # Generate audio
    try:
        audio_file_path = generate_audio_gtts(rewritten_text)
        if audio_file_path is None:
            return status_msg, text_to_process, rewritten_text, "❌ Failed to generate audio."
    except Exception as e:
        return status_msg, text_to_process, rewritten_text, f"Error generating audio: {str(e)}"
    
    return (
        status_msg,
        text_to_process,
        rewritten_text, 
        audio_file_path
    )

def get_model_status():
    """Get current model status"""
    global model_loaded
    if model_loaded:
        device = "GPU" if torch.cuda.is_available() else "CPU"
        return f"✅ Model loaded on {device}"
    else:
        return "❌ Model not loaded"

# Create Gradio interface
def create_interface():
    with gr.Blocks(
        title="EchoVerse - Local AI Audiobook Creator",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            font-family: 'Arial', sans-serif;
        }
        .main-header {
            text-align: center;
            color: #2E86AB;
            margin-bottom: 20px;
        }
        .status-box {
            padding: 10px;
            border-radius: 5px;
            margin: 10px 0;
        }
        """
    ) as demo:
        
        # Header
        gr.HTML("""
        <div class="main-header">
            <h1>��� EchoVerse Local</h1>
            <h3>Transform Text into Expressive Audiobooks with Local AI</h3>
            <p><i>Powered by IBM Granite 3B - No internet required for AI processing!</i></p>
        </div>
        """)
        
        # Model Setup Section
        with gr.Group():
            gr.HTML("<h2>��� AI Model Setup</h2>")
            
            with gr.Row():
                model_choice = gr.Dropdown(
                    choices=list(MODEL_CONFIG["models"].keys()),
                    value="granite-3b",
                    label="Choose Granite Model",
                    info="3B model is recommended for most computers. 8B requires more RAM."
                )
                
                load_btn = gr.Button("Load Model", variant="primary")
            
            model_status = gr.Textbox(
                label="Model Status",
                value="❌ Model not loaded",
                interactive=False
            )
        
        # Input Section
        with gr.Group():
            gr.HTML("<h2>��� Input Your Content</h2>")
            
            uploaded_file = gr.File(
                label="Upload a text file",
                file_types=[".txt"],
                type="binary"
            )
            
            input_text = gr.Textbox(
                label="Or paste your text here:",
                lines=8,
                placeholder="Enter the text you want to convert to an audiobook...",
                max_lines=15
            )
        
        # Configuration Section
        with gr.Group():
            gr.HTML("<h2>⚙️ Audio Configuration</h2>")
            
            with gr.Row():
                tone = gr.Dropdown(
                    choices=["Neutral", "Suspenseful", "Inspiring"],
                    value="Neutral",
                    label="Select Tone",
                    info="Choose how you want the text to be rewritten"
                )
        
        # Generate Button
        generate_btn = gr.Button("��� Generate Audiobook", variant="primary", size="lg")
        
        # Results Section
        with gr.Group():
            gr.HTML("<h2>��� Results</h2>")
            
            status_output = gr.Textbox(
                label="Status",
                interactive=False
            )
            
            with gr.Row():
                original_text = gr.Textbox(
                    label="Original Text",
                    lines=10,
                    interactive=False
                )
                
                rewritten_text = gr.Textbox(
                    label="Rewritten Text",
                    lines=10,
                    interactive=False
                )
            
            # Audio Output
            gr.HTML("<h2>��� Your Audiobook</h2>")
            audio_output = gr.Audio(
                label="Generated Audiobook",
                type="filepath"
            )
        
        # System Info
        with gr.Group():
            gr.HTML("<h2>��� System Info</h2>")
            
            system_info = gr.HTML(f"""
            <div>
                <p><strong>GPU Available:</strong> {'✅ Yes' if torch.cuda.is_available() else '❌ No (CPU only)'}</p>
                <p><strong>TTS Engine:</strong> {TTS_CONFIG['engine']}</p>
            </div>
            
            <h3>��� Tips</h3>
            <ul>
                <li>First model load takes time</li>
                <li>3B model: ~6GB RAM needed</li>
                <li>8B model: ~16GB RAM needed</li>
                <li>GPU greatly speeds up processing</li>
                <li>gTTS requires internet connection</li>
            </ul>
            """)
        
        # Event handlers
        load_btn.click(
            fn=load_granite_model,
            inputs=[model_choice],
            outputs=[model_status]
        )
        
        generate_btn.click(
            fn=process_audiobook,
            inputs=[input_text, uploaded_file, tone, model_choice],
            outputs=[status_output, original_text, rewritten_text, audio_output]
        )
    
    return demo

# Launch the app
if __name__ == "__main__":
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )