import gradio as gr import datetime from typing import Dict, List, Any, Union, Optional import os import numpy as np from pathlib import Path from PIL import Image import io import base64 # Import utilities from utils.storage import load_data, save_data from utils.state import generate_id, get_timestamp, record_activity from utils.multimedia import ( analyze_image, extract_text_from_image, generate_qr_code, scan_document, generate_mind_map, transcribe_speech, text_to_speech, detect_language, tag_image, create_diagram ) from utils.logging import get_logger from utils.error_handling import handle_exceptions, ValidationError # Initialize logger logger = get_logger(__name__) def create_multimedia_page(state: Dict[str, Any]) -> None: """ Create the Multimedia Processing page with various media processing tools Args: state: Application state """ # Initialize multimedia data if not present if "multimedia" not in state: state["multimedia"] = { "processed_items": [], "settings": { "default_language": "en", "image_quality": "medium", "save_processed": True } } # Create the multimedia page layout with gr.Column(elem_id="multimedia-page"): gr.Markdown("# 🖼️ Multimedia & Advanced Processing") gr.Markdown("*Process images, audio, and create visual content with AI-powered tools*") # Create tabs for different multimedia processing categories with gr.Tabs() as multimedia_tabs: # Vision & Media Processing Tab with gr.TabItem("🖼️ Vision & Media Processing"): # Image Captioning Section with gr.Box(): gr.Markdown("### 📷 Image Captioning") gr.Markdown("*Generate descriptive captions for images using BLIP model*") with gr.Row(): with gr.Column(scale=2): image_input = gr.Image(type="pil", label="Upload Image") with gr.Column(scale=3): caption_output = gr.Textbox(label="Generated Caption", lines=3) caption_button = gr.Button("Generate Caption", variant="primary") @handle_exceptions def generate_image_caption(image): if image is None: return "Please upload an image first." logger.info("Generating image caption") caption = analyze_image(image) # Record activity record_activity({ "type": "image_captioned", "timestamp": get_timestamp() }) return caption caption_button.click( generate_image_caption, inputs=[image_input], outputs=[caption_output] ) # OCR Text Extraction Section with gr.Box(): gr.Markdown("### 📝 OCR Text Extraction") gr.Markdown("*Extract text from images using OCR technology*") with gr.Row(): with gr.Column(scale=2): ocr_image_input = gr.Image(type="pil", label="Upload Image") with gr.Column(scale=3): ocr_text_output = gr.Textbox(label="Extracted Text", lines=5) ocr_button = gr.Button("Extract Text", variant="primary") @handle_exceptions def extract_text(image): if image is None: return "Please upload an image first." logger.info("Extracting text from image") text = extract_text_from_image(image) # Record activity record_activity({ "type": "ocr_performed", "timestamp": get_timestamp() }) return text ocr_button.click( extract_text, inputs=[ocr_image_input], outputs=[ocr_text_output] ) # Image Tagging Section with gr.Box(): gr.Markdown("### 🏷️ Image Tagging") gr.Markdown("*Automatically generate tags for images*") with gr.Row(): with gr.Column(scale=2): tag_image_input = gr.Image(type="pil", label="Upload Image") with gr.Column(scale=3): tag_output = gr.Dataframe( headers=["Tags"], datatype=["str"], label="Generated Tags" ) tag_button = gr.Button("Generate Tags", variant="primary") @handle_exceptions def generate_image_tags(image): if image is None: return [["Please upload an image first."]] logger.info("Generating image tags") tags = tag_image(image) # Format tags for dataframe tags_df = [[tag] for tag in tags] # Record activity record_activity({ "type": "image_tagged", "timestamp": get_timestamp() }) return tags_df tag_button.click( generate_image_tags, inputs=[tag_image_input], outputs=[tag_output] ) # Mind Map Generation Section with gr.Box(): gr.Markdown("### 🧠 Mind Map Generation") gr.Markdown("*Create visual mind maps from topics*") with gr.Row(): with gr.Column(scale=2): topics_input = gr.Textbox( label="Enter Topics (one per line)", lines=5, placeholder="Main Topic\nSubtopic 1\nSubtopic 2\nSubtopic 3" ) with gr.Column(scale=3): mindmap_output = gr.Image(type="pil", label="Generated Mind Map") mindmap_button = gr.Button("Generate Mind Map", variant="primary") @handle_exceptions def create_mind_map(topics_text): if not topics_text.strip(): return None # Parse topics from text topics = [topic.strip() for topic in topics_text.split("\n") if topic.strip()] if len(topics) < 2: raise ValidationError("Please enter at least 2 topics.") logger.info(f"Generating mind map with {len(topics)} topics") # Generate connections (simple radial structure) connections = [(0, i) for i in range(1, len(topics))] # Generate mind map mind_map = generate_mind_map(topics, connections) # Record activity record_activity({ "type": "mind_map_created", "timestamp": get_timestamp() }) return mind_map mindmap_button.click( create_mind_map, inputs=[topics_input], outputs=[mindmap_output] ) # Document Scanning Section with gr.Box(): gr.Markdown("### 📄 Document Scanning") gr.Markdown("*Scan documents and extract text*") with gr.Row(): with gr.Column(scale=2): scan_image_input = gr.Image(type="pil", label="Upload Document Image") with gr.Column(scale=3): with gr.Row(): scan_image_output = gr.Image(type="pil", label="Processed Document") with gr.Row(): scan_text_output = gr.Textbox(label="Extracted Text", lines=5) scan_button = gr.Button("Scan Document", variant="primary") @handle_exceptions def scan_doc(image): if image is None: return None, "Please upload an image first." logger.info("Scanning document") result = scan_document(image) # Record activity record_activity({ "type": "document_scanned", "timestamp": get_timestamp() }) return result["processed_image"], result["text"] scan_button.click( scan_doc, inputs=[scan_image_input], outputs=[scan_image_output, scan_text_output] ) # QR Code Generator Section with gr.Box(): gr.Markdown("### 📱 QR Code Generator") gr.Markdown("*Create QR codes from text or URLs*") with gr.Row(): with gr.Column(scale=2): qr_text_input = gr.Textbox( label="Enter Text or URL", lines=3, placeholder="https://example.com" ) with gr.Row(): qr_size_input = gr.Slider( minimum=5, maximum=20, value=10, step=1, label="Box Size" ) qr_border_input = gr.Slider( minimum=1, maximum=10, value=4, step=1, label="Border Size" ) with gr.Column(scale=3): qr_output = gr.Image(type="pil", label="Generated QR Code") qr_button = gr.Button("Generate QR Code", variant="primary") @handle_exceptions def create_qr_code(text, box_size, border): if not text.strip(): return None logger.info("Generating QR code") qr_code = generate_qr_code(text, int(box_size), int(border)) # Record activity record_activity({ "type": "qr_code_generated", "timestamp": get_timestamp() }) return qr_code qr_button.click( create_qr_code, inputs=[qr_text_input, qr_size_input, qr_border_input], outputs=[qr_output] ) # Chart Generation Section with gr.Box(): gr.Markdown("### 📊 Chart Generation") gr.Markdown("*Create charts and visualizations from data*") with gr.Row(): with gr.Column(scale=2): chart_type = gr.Dropdown( choices=["bar_chart", "pie_chart"], label="Chart Type", value="bar_chart" ) chart_title = gr.Textbox(label="Chart Title", value="My Chart") chart_labels = gr.Textbox( label="Labels (comma separated)", placeholder="Label1, Label2, Label3", value="Category A, Category B, Category C" ) chart_values = gr.Textbox( label="Values (comma separated)", placeholder="10, 20, 30", value="30, 50, 20" ) with gr.Column(scale=3): chart_output = gr.Image(type="pil", label="Generated Chart") chart_button = gr.Button("Generate Chart", variant="primary") @handle_exceptions def create_chart(chart_type, title, labels, values): if not labels.strip() or not values.strip(): return None # Parse labels and values labels_list = [label.strip() for label in labels.split(",") if label.strip()] try: values_list = [float(val.strip()) for val in values.split(",") if val.strip()] except ValueError: raise ValidationError("Values must be numbers.") if len(labels_list) != len(values_list): raise ValidationError("Number of labels must match number of values.") logger.info(f"Generating {chart_type} chart") # Prepare data for diagram creation data = { "title": title, "labels": labels_list, "values": values_list, "x_label": "Categories", "y_label": "Values" } # Create chart chart = create_diagram(chart_type, data) # Record activity record_activity({ "type": "chart_generated", "chart_type": chart_type, "timestamp": get_timestamp() }) return chart chart_button.click( create_chart, inputs=[chart_type, chart_title, chart_labels, chart_values], outputs=[chart_output] ) # Diagram Creation Section with gr.Box(): gr.Markdown("### 📈 Diagram Creation") gr.Markdown("*Create flowcharts and diagrams*") with gr.Row(): with gr.Column(scale=2): flowchart_nodes = gr.Textbox( label="Nodes (one per line, format: id:label)", lines=5, placeholder="start:Start\nprocess:Process Data\nend:End", value="start:Start\nprocess:Process Data\ndecision:Make Decision\nend:End" ) flowchart_edges = gr.Textbox( label="Edges (one per line, format: source->target:label)", lines=5, placeholder="start->process:begin\nprocess->end:complete", value="start->process:begin\nprocess->decision:analyze\ndecision->end:yes\ndecision->process:no, retry" ) with gr.Column(scale=3): flowchart_output = gr.Image(type="pil", label="Generated Flowchart") flowchart_button = gr.Button("Generate Flowchart", variant="primary") @handle_exceptions def create_flowchart(nodes_text, edges_text): if not nodes_text.strip() or not edges_text.strip(): return None # Parse nodes nodes = [] for line in nodes_text.split("\n"): if not line.strip(): continue parts = line.strip().split(":", 1) if len(parts) == 2: node_id, label = parts nodes.append({"id": node_id.strip(), "label": label.strip()}) else: nodes.append({"id": parts[0].strip(), "label": parts[0].strip()}) # Parse edges edges = [] for line in edges_text.split("\n"): if not line.strip(): continue # Check if there's a label if ":" in line: connection, label = line.strip().split(":", 1) else: connection, label = line.strip(), "" # Parse source and target if "->" in connection: source, target = connection.split("->", 1) edges.append({ "source": source.strip(), "target": target.strip(), "label": label.strip() }) logger.info(f"Generating flowchart with {len(nodes)} nodes and {len(edges)} edges") # Prepare data for diagram creation data = { "nodes": nodes, "edges": edges } # Create flowchart flowchart = create_diagram("flowchart", data) # Record activity record_activity({ "type": "flowchart_created", "timestamp": get_timestamp() }) return flowchart flowchart_button.click( create_flowchart, inputs=[flowchart_nodes, flowchart_edges], outputs=[flowchart_output] ) # Voice & Audio Features Tab with gr.TabItem("🎤 Voice & Audio Features"): # Speech-to-Text Section with gr.Box(): gr.Markdown("### 🗣️ Speech-to-Text") gr.Markdown("*Convert speech to text using Whisper model*") with gr.Row(): with gr.Column(scale=2): audio_input = gr.Audio(type="filepath", label="Record or Upload Audio") with gr.Column(scale=3): transcript_output = gr.Textbox(label="Transcription", lines=5) transcribe_button = gr.Button("Transcribe", variant="primary") @handle_exceptions def transcribe_audio(audio_path): if audio_path is None: return "Please record or upload audio first." logger.info("Transcribing audio") transcript = transcribe_speech(audio_path) # Record activity record_activity({ "type": "speech_transcribed", "timestamp": get_timestamp() }) return transcript transcribe_button.click( transcribe_audio, inputs=[audio_input], outputs=[transcript_output] ) # Voice Notes Section with gr.Box(): gr.Markdown("### 📝 Voice Notes") gr.Markdown("*Record voice notes and save them with transcriptions*") with gr.Row(): with gr.Column(scale=2): voice_note_audio = gr.Audio(type="filepath", label="Record Voice Note") voice_note_title = gr.Textbox(label="Note Title", placeholder="Meeting Notes") with gr.Column(scale=3): voice_note_transcript = gr.Textbox(label="Transcription", lines=5) voice_note_button = gr.Button("Save Voice Note", variant="primary") @handle_exceptions def save_voice_note(audio_path, title): if audio_path is None: return "Please record audio first." if not title.strip(): title = "Voice Note - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M") logger.info(f"Saving voice note: {title}") # Transcribe the audio transcript = transcribe_speech(audio_path) # Create a new note if "notes" not in state: state["notes"] = [] new_note = { "id": generate_id(), "title": title, "content": transcript, "tags": ["voice-note"], "created_at": get_timestamp(), "updated_at": get_timestamp(), "audio_path": audio_path # In a real app, you'd copy this to a permanent location } state["notes"].append(new_note) save_data("notes.json", state["notes"]) # Record activity record_activity({ "type": "voice_note_created", "note_id": new_note["id"], "timestamp": get_timestamp() }) return transcript voice_note_button.click( save_voice_note, inputs=[voice_note_audio, voice_note_title], outputs=[voice_note_transcript] ) # Task Dictation Section with gr.Box(): gr.Markdown("### 📋 Task Dictation") gr.Markdown("*Create tasks using voice commands*") with gr.Row(): with gr.Column(scale=2): task_audio = gr.Audio(type="filepath", label="Record Task") with gr.Column(scale=3): task_transcript = gr.Textbox(label="Transcription", lines=3) task_title = gr.Textbox(label="Task Title") task_button = gr.Button("Create Task", variant="primary") @handle_exceptions def create_task_from_voice(audio_path): if audio_path is None: return "Please record audio first.", "" logger.info("Creating task from voice") # Transcribe the audio transcript = transcribe_speech(audio_path) # Extract task title (first sentence or first 50 chars) if "." in transcript: title = transcript.split(".")[0] + "." else: title = transcript[:min(50, len(transcript))] if len(transcript) > 50: title += "..." return transcript, title @handle_exceptions def save_dictated_task(audio_path, transcript, title): if not transcript.strip() or not title.strip(): return "Please provide task details." # Create a new task if "tasks" not in state: state["tasks"] = [] new_task = { "id": generate_id(), "title": title, "description": transcript, "status": "To Do", "priority": "Medium", "created_at": get_timestamp(), "due_date": (datetime.datetime.now() + datetime.timedelta(days=7)).strftime("%Y-%m-%d"), "tags": ["dictated"] } state["tasks"].append(new_task) save_data("tasks.json", state["tasks"]) # Record activity record_activity({ "type": "task_dictated", "task_id": new_task["id"], "timestamp": get_timestamp() }) return "Task created successfully!" task_audio.change( create_task_from_voice, inputs=[task_audio], outputs=[task_transcript, task_title] ) task_button.click( save_dictated_task, inputs=[task_audio, task_transcript, task_title], outputs=[gr.Textbox(label="Status")] ) # Language Detection Section with gr.Box(): gr.Markdown("### 🌐 Language Detection") gr.Markdown("*Automatically detect language from speech*") with gr.Row(): with gr.Column(scale=2): lang_audio = gr.Audio(type="filepath", label="Record or Upload Audio") with gr.Column(scale=3): lang_output = gr.Textbox(label="Detected Language") lang_button = gr.Button("Detect Language", variant="primary") @handle_exceptions def detect_speech_language(audio_path): if audio_path is None: return "Please record or upload audio first." logger.info("Detecting language from speech") language_code = detect_language(audio_path) # Map language code to full name language_names = { "en": "English", "fr": "French", "es": "Spanish", "de": "German", "it": "Italian", "pt": "Portuguese", "nl": "Dutch", "ru": "Russian", "ja": "Japanese", "zh": "Chinese", "ar": "Arabic" } language_name = language_names.get(language_code, f"Unknown ({language_code})") # Record activity record_activity({ "type": "language_detected", "language": language_code, "timestamp": get_timestamp() }) return f"{language_name} ({language_code})" lang_button.click( detect_speech_language, inputs=[lang_audio], outputs=[lang_output] ) # Audio Transcription Section with gr.Box(): gr.Markdown("### 📝 Audio Transcription") gr.Markdown("*Transcribe longer audio recordings like meetings*") with gr.Row(): with gr.Column(scale=2): meeting_audio = gr.Audio(type="filepath", label="Upload Audio Recording") meeting_title = gr.Textbox(label="Meeting Title", placeholder="Team Meeting") with gr.Column(scale=3): meeting_transcript = gr.Textbox(label="Transcription", lines=10) meeting_button = gr.Button("Transcribe & Save", variant="primary") @handle_exceptions def transcribe_meeting(audio_path, title): if audio_path is None: return "Please upload audio first." if not title.strip(): title = "Meeting - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M") logger.info(f"Transcribing meeting: {title}") # Transcribe the audio transcript = transcribe_speech(audio_path) # Create a new note for the meeting if "notes" not in state: state["notes"] = [] new_note = { "id": generate_id(), "title": title, "content": f"# {title}\n\n{transcript}", "tags": ["meeting", "transcript"], "created_at": get_timestamp(), "updated_at": get_timestamp(), "audio_path": audio_path # In a real app, you'd copy this to a permanent location } state["notes"].append(new_note) save_data("notes.json", state["notes"]) # Record activity record_activity({ "type": "meeting_transcribed", "note_id": new_note["id"], "timestamp": get_timestamp() }) return transcript meeting_button.click( transcribe_meeting, inputs=[meeting_audio, meeting_title], outputs=[meeting_transcript] ) # Text-to-Speech Section with gr.Box(): gr.Markdown("### 🔊 Text-to-Speech") gr.Markdown("*Convert text to speech for accessibility*") with gr.Row(): with gr.Column(scale=2): tts_text = gr.Textbox( label="Text to Convert", lines=5, placeholder="Enter text to convert to speech" ) with gr.Row(): tts_lang = gr.Dropdown( choices=["en", "fr", "es", "de", "it"], label="Language", value="en" ) tts_slow = gr.Checkbox(label="Slow Speed", value=False) with gr.Column(scale=3): tts_output = gr.Audio(label="Generated Speech") tts_button = gr.Button("Generate Speech", variant="primary") @handle_exceptions def convert_text_to_speech(text, lang, slow): if not text.strip(): return None logger.info(f"Converting text to speech in {lang}") # Convert text to speech audio_data = text_to_speech(text, lang, slow) # Save to a temporary file temp_path = os.path.join(os.path.dirname(__file__), "temp_tts.mp3") with open(temp_path, "wb") as f: f.write(audio_data) # Record activity record_activity({ "type": "text_to_speech", "language": lang, "timestamp": get_timestamp() }) return temp_path tts_button.click( convert_text_to_speech, inputs=[tts_text, tts_lang, tts_slow], outputs=[tts_output] ) # Audio Reminders Section with gr.Box(): gr.Markdown("### ⏰ Audio Reminders") gr.Markdown("*Create spoken reminders*") with gr.Row(): with gr.Column(scale=2): reminder_text = gr.Textbox( label="Reminder Text", placeholder="Take a break and stretch" ) reminder_time = gr.Textbox( label="Time (HH:MM)", placeholder="14:30", value=datetime.datetime.now().strftime("%H:%M") ) with gr.Column(scale=3): reminder_preview = gr.Audio(label="Reminder Preview") reminder_status = gr.Textbox(label="Status") reminder_button = gr.Button("Create Reminder", variant="primary") preview_button = gr.Button("Preview") @handle_exceptions def preview_reminder(text): if not text.strip(): return None logger.info("Previewing reminder") # Convert text to speech audio_data = text_to_speech(text, "en", False) # Save to a temporary file temp_path = os.path.join(os.path.dirname(__file__), "temp_reminder.mp3") with open(temp_path, "wb") as f: f.write(audio_data) return temp_path @handle_exceptions def create_reminder(text, time_str): if not text.strip() or not time_str.strip(): return "Please provide both reminder text and time." # Validate time format try: hour, minute = map(int, time_str.split(":")) if hour < 0 or hour > 23 or minute < 0 or minute > 59: return "Invalid time format. Please use HH:MM (24-hour format)." except ValueError: return "Invalid time format. Please use HH:MM (24-hour format)." logger.info(f"Creating reminder for {time_str}: {text}") # In a real app, you would schedule this reminder # For this demo, we'll just save it # Initialize reminders if not present if "reminders" not in state: state["reminders"] = [] # Convert text to speech audio_data = text_to_speech(text, "en", False) # Save to a file (in a real app, you'd use a better file naming scheme) reminder_id = generate_id() audio_path = os.path.join(os.path.dirname(__file__), f"reminder_{reminder_id}.mp3") with open(audio_path, "wb") as f: f.write(audio_data) # Create reminder object reminder = { "id": reminder_id, "text": text, "time": time_str, "audio_path": audio_path, "created_at": get_timestamp(), "active": True } state["reminders"].append(reminder) save_data("reminders.json", state["reminders"]) # Record activity record_activity({ "type": "reminder_created", "reminder_id": reminder_id, "timestamp": get_timestamp() }) return f"Reminder set for {time_str}" preview_button.click( preview_reminder, inputs=[reminder_text], outputs=[reminder_preview] ) reminder_button.click( create_reminder, inputs=[reminder_text, reminder_time], outputs=[reminder_status] ) # Record page visit in activity record_activity({ "type": "page_viewed", "page": "Multimedia & Advanced Processing", "timestamp": get_timestamp() })