mona / pages /multimedia.py
mrradix's picture
Upload 48 files
8e4018d verified
import gradio as gr
import datetime
from typing import Dict, List, Any, Union, Optional
import os
import numpy as np
from pathlib import Path
from PIL import Image
import io
import base64
# Import utilities
from utils.storage import load_data, save_data
from utils.state import generate_id, get_timestamp, record_activity
from utils.multimedia import (
analyze_image, extract_text_from_image, generate_qr_code, scan_document,
generate_mind_map, transcribe_speech, text_to_speech, detect_language,
tag_image, create_diagram
)
from utils.logging import get_logger
from utils.error_handling import handle_exceptions, ValidationError
# Initialize logger
logger = get_logger(__name__)
def create_multimedia_page(state: Dict[str, Any]) -> None:
"""
Create the Multimedia Processing page with various media processing tools
Args:
state: Application state
"""
# Initialize multimedia data if not present
if "multimedia" not in state:
state["multimedia"] = {
"processed_items": [],
"settings": {
"default_language": "en",
"image_quality": "medium",
"save_processed": True
}
}
# Create the multimedia page layout
with gr.Column(elem_id="multimedia-page"):
gr.Markdown("# 🖼️ Multimedia & Advanced Processing")
gr.Markdown("*Process images, audio, and create visual content with AI-powered tools*")
# Create tabs for different multimedia processing categories
with gr.Tabs() as multimedia_tabs:
# Vision & Media Processing Tab
with gr.TabItem("🖼️ Vision & Media Processing"):
# Image Captioning Section
with gr.Box():
gr.Markdown("### 📷 Image Captioning")
gr.Markdown("*Generate descriptive captions for images using BLIP model*")
with gr.Row():
with gr.Column(scale=2):
image_input = gr.Image(type="pil", label="Upload Image")
with gr.Column(scale=3):
caption_output = gr.Textbox(label="Generated Caption", lines=3)
caption_button = gr.Button("Generate Caption", variant="primary")
@handle_exceptions
def generate_image_caption(image):
if image is None:
return "Please upload an image first."
logger.info("Generating image caption")
caption = analyze_image(image)
# Record activity
record_activity({
"type": "image_captioned",
"timestamp": get_timestamp()
})
return caption
caption_button.click(
generate_image_caption,
inputs=[image_input],
outputs=[caption_output]
)
# OCR Text Extraction Section
with gr.Box():
gr.Markdown("### 📝 OCR Text Extraction")
gr.Markdown("*Extract text from images using OCR technology*")
with gr.Row():
with gr.Column(scale=2):
ocr_image_input = gr.Image(type="pil", label="Upload Image")
with gr.Column(scale=3):
ocr_text_output = gr.Textbox(label="Extracted Text", lines=5)
ocr_button = gr.Button("Extract Text", variant="primary")
@handle_exceptions
def extract_text(image):
if image is None:
return "Please upload an image first."
logger.info("Extracting text from image")
text = extract_text_from_image(image)
# Record activity
record_activity({
"type": "ocr_performed",
"timestamp": get_timestamp()
})
return text
ocr_button.click(
extract_text,
inputs=[ocr_image_input],
outputs=[ocr_text_output]
)
# Image Tagging Section
with gr.Box():
gr.Markdown("### 🏷️ Image Tagging")
gr.Markdown("*Automatically generate tags for images*")
with gr.Row():
with gr.Column(scale=2):
tag_image_input = gr.Image(type="pil", label="Upload Image")
with gr.Column(scale=3):
tag_output = gr.Dataframe(
headers=["Tags"],
datatype=["str"],
label="Generated Tags"
)
tag_button = gr.Button("Generate Tags", variant="primary")
@handle_exceptions
def generate_image_tags(image):
if image is None:
return [["Please upload an image first."]]
logger.info("Generating image tags")
tags = tag_image(image)
# Format tags for dataframe
tags_df = [[tag] for tag in tags]
# Record activity
record_activity({
"type": "image_tagged",
"timestamp": get_timestamp()
})
return tags_df
tag_button.click(
generate_image_tags,
inputs=[tag_image_input],
outputs=[tag_output]
)
# Mind Map Generation Section
with gr.Box():
gr.Markdown("### 🧠 Mind Map Generation")
gr.Markdown("*Create visual mind maps from topics*")
with gr.Row():
with gr.Column(scale=2):
topics_input = gr.Textbox(
label="Enter Topics (one per line)",
lines=5,
placeholder="Main Topic\nSubtopic 1\nSubtopic 2\nSubtopic 3"
)
with gr.Column(scale=3):
mindmap_output = gr.Image(type="pil", label="Generated Mind Map")
mindmap_button = gr.Button("Generate Mind Map", variant="primary")
@handle_exceptions
def create_mind_map(topics_text):
if not topics_text.strip():
return None
# Parse topics from text
topics = [topic.strip() for topic in topics_text.split("\n") if topic.strip()]
if len(topics) < 2:
raise ValidationError("Please enter at least 2 topics.")
logger.info(f"Generating mind map with {len(topics)} topics")
# Generate connections (simple radial structure)
connections = [(0, i) for i in range(1, len(topics))]
# Generate mind map
mind_map = generate_mind_map(topics, connections)
# Record activity
record_activity({
"type": "mind_map_created",
"timestamp": get_timestamp()
})
return mind_map
mindmap_button.click(
create_mind_map,
inputs=[topics_input],
outputs=[mindmap_output]
)
# Document Scanning Section
with gr.Box():
gr.Markdown("### 📄 Document Scanning")
gr.Markdown("*Scan documents and extract text*")
with gr.Row():
with gr.Column(scale=2):
scan_image_input = gr.Image(type="pil", label="Upload Document Image")
with gr.Column(scale=3):
with gr.Row():
scan_image_output = gr.Image(type="pil", label="Processed Document")
with gr.Row():
scan_text_output = gr.Textbox(label="Extracted Text", lines=5)
scan_button = gr.Button("Scan Document", variant="primary")
@handle_exceptions
def scan_doc(image):
if image is None:
return None, "Please upload an image first."
logger.info("Scanning document")
result = scan_document(image)
# Record activity
record_activity({
"type": "document_scanned",
"timestamp": get_timestamp()
})
return result["processed_image"], result["text"]
scan_button.click(
scan_doc,
inputs=[scan_image_input],
outputs=[scan_image_output, scan_text_output]
)
# QR Code Generator Section
with gr.Box():
gr.Markdown("### 📱 QR Code Generator")
gr.Markdown("*Create QR codes from text or URLs*")
with gr.Row():
with gr.Column(scale=2):
qr_text_input = gr.Textbox(
label="Enter Text or URL",
lines=3,
placeholder="https://example.com"
)
with gr.Row():
qr_size_input = gr.Slider(
minimum=5, maximum=20, value=10, step=1,
label="Box Size"
)
qr_border_input = gr.Slider(
minimum=1, maximum=10, value=4, step=1,
label="Border Size"
)
with gr.Column(scale=3):
qr_output = gr.Image(type="pil", label="Generated QR Code")
qr_button = gr.Button("Generate QR Code", variant="primary")
@handle_exceptions
def create_qr_code(text, box_size, border):
if not text.strip():
return None
logger.info("Generating QR code")
qr_code = generate_qr_code(text, int(box_size), int(border))
# Record activity
record_activity({
"type": "qr_code_generated",
"timestamp": get_timestamp()
})
return qr_code
qr_button.click(
create_qr_code,
inputs=[qr_text_input, qr_size_input, qr_border_input],
outputs=[qr_output]
)
# Chart Generation Section
with gr.Box():
gr.Markdown("### 📊 Chart Generation")
gr.Markdown("*Create charts and visualizations from data*")
with gr.Row():
with gr.Column(scale=2):
chart_type = gr.Dropdown(
choices=["bar_chart", "pie_chart"],
label="Chart Type",
value="bar_chart"
)
chart_title = gr.Textbox(label="Chart Title", value="My Chart")
chart_labels = gr.Textbox(
label="Labels (comma separated)",
placeholder="Label1, Label2, Label3",
value="Category A, Category B, Category C"
)
chart_values = gr.Textbox(
label="Values (comma separated)",
placeholder="10, 20, 30",
value="30, 50, 20"
)
with gr.Column(scale=3):
chart_output = gr.Image(type="pil", label="Generated Chart")
chart_button = gr.Button("Generate Chart", variant="primary")
@handle_exceptions
def create_chart(chart_type, title, labels, values):
if not labels.strip() or not values.strip():
return None
# Parse labels and values
labels_list = [label.strip() for label in labels.split(",") if label.strip()]
try:
values_list = [float(val.strip()) for val in values.split(",") if val.strip()]
except ValueError:
raise ValidationError("Values must be numbers.")
if len(labels_list) != len(values_list):
raise ValidationError("Number of labels must match number of values.")
logger.info(f"Generating {chart_type} chart")
# Prepare data for diagram creation
data = {
"title": title,
"labels": labels_list,
"values": values_list,
"x_label": "Categories",
"y_label": "Values"
}
# Create chart
chart = create_diagram(chart_type, data)
# Record activity
record_activity({
"type": "chart_generated",
"chart_type": chart_type,
"timestamp": get_timestamp()
})
return chart
chart_button.click(
create_chart,
inputs=[chart_type, chart_title, chart_labels, chart_values],
outputs=[chart_output]
)
# Diagram Creation Section
with gr.Box():
gr.Markdown("### 📈 Diagram Creation")
gr.Markdown("*Create flowcharts and diagrams*")
with gr.Row():
with gr.Column(scale=2):
flowchart_nodes = gr.Textbox(
label="Nodes (one per line, format: id:label)",
lines=5,
placeholder="start:Start\nprocess:Process Data\nend:End",
value="start:Start\nprocess:Process Data\ndecision:Make Decision\nend:End"
)
flowchart_edges = gr.Textbox(
label="Edges (one per line, format: source->target:label)",
lines=5,
placeholder="start->process:begin\nprocess->end:complete",
value="start->process:begin\nprocess->decision:analyze\ndecision->end:yes\ndecision->process:no, retry"
)
with gr.Column(scale=3):
flowchart_output = gr.Image(type="pil", label="Generated Flowchart")
flowchart_button = gr.Button("Generate Flowchart", variant="primary")
@handle_exceptions
def create_flowchart(nodes_text, edges_text):
if not nodes_text.strip() or not edges_text.strip():
return None
# Parse nodes
nodes = []
for line in nodes_text.split("\n"):
if not line.strip():
continue
parts = line.strip().split(":", 1)
if len(parts) == 2:
node_id, label = parts
nodes.append({"id": node_id.strip(), "label": label.strip()})
else:
nodes.append({"id": parts[0].strip(), "label": parts[0].strip()})
# Parse edges
edges = []
for line in edges_text.split("\n"):
if not line.strip():
continue
# Check if there's a label
if ":" in line:
connection, label = line.strip().split(":", 1)
else:
connection, label = line.strip(), ""
# Parse source and target
if "->" in connection:
source, target = connection.split("->", 1)
edges.append({
"source": source.strip(),
"target": target.strip(),
"label": label.strip()
})
logger.info(f"Generating flowchart with {len(nodes)} nodes and {len(edges)} edges")
# Prepare data for diagram creation
data = {
"nodes": nodes,
"edges": edges
}
# Create flowchart
flowchart = create_diagram("flowchart", data)
# Record activity
record_activity({
"type": "flowchart_created",
"timestamp": get_timestamp()
})
return flowchart
flowchart_button.click(
create_flowchart,
inputs=[flowchart_nodes, flowchart_edges],
outputs=[flowchart_output]
)
# Voice & Audio Features Tab
with gr.TabItem("🎤 Voice & Audio Features"):
# Speech-to-Text Section
with gr.Box():
gr.Markdown("### 🗣️ Speech-to-Text")
gr.Markdown("*Convert speech to text using Whisper model*")
with gr.Row():
with gr.Column(scale=2):
audio_input = gr.Audio(type="filepath", label="Record or Upload Audio")
with gr.Column(scale=3):
transcript_output = gr.Textbox(label="Transcription", lines=5)
transcribe_button = gr.Button("Transcribe", variant="primary")
@handle_exceptions
def transcribe_audio(audio_path):
if audio_path is None:
return "Please record or upload audio first."
logger.info("Transcribing audio")
transcript = transcribe_speech(audio_path)
# Record activity
record_activity({
"type": "speech_transcribed",
"timestamp": get_timestamp()
})
return transcript
transcribe_button.click(
transcribe_audio,
inputs=[audio_input],
outputs=[transcript_output]
)
# Voice Notes Section
with gr.Box():
gr.Markdown("### 📝 Voice Notes")
gr.Markdown("*Record voice notes and save them with transcriptions*")
with gr.Row():
with gr.Column(scale=2):
voice_note_audio = gr.Audio(type="filepath", label="Record Voice Note")
voice_note_title = gr.Textbox(label="Note Title", placeholder="Meeting Notes")
with gr.Column(scale=3):
voice_note_transcript = gr.Textbox(label="Transcription", lines=5)
voice_note_button = gr.Button("Save Voice Note", variant="primary")
@handle_exceptions
def save_voice_note(audio_path, title):
if audio_path is None:
return "Please record audio first."
if not title.strip():
title = "Voice Note - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
logger.info(f"Saving voice note: {title}")
# Transcribe the audio
transcript = transcribe_speech(audio_path)
# Create a new note
if "notes" not in state:
state["notes"] = []
new_note = {
"id": generate_id(),
"title": title,
"content": transcript,
"tags": ["voice-note"],
"created_at": get_timestamp(),
"updated_at": get_timestamp(),
"audio_path": audio_path # In a real app, you'd copy this to a permanent location
}
state["notes"].append(new_note)
save_data("notes.json", state["notes"])
# Record activity
record_activity({
"type": "voice_note_created",
"note_id": new_note["id"],
"timestamp": get_timestamp()
})
return transcript
voice_note_button.click(
save_voice_note,
inputs=[voice_note_audio, voice_note_title],
outputs=[voice_note_transcript]
)
# Task Dictation Section
with gr.Box():
gr.Markdown("### 📋 Task Dictation")
gr.Markdown("*Create tasks using voice commands*")
with gr.Row():
with gr.Column(scale=2):
task_audio = gr.Audio(type="filepath", label="Record Task")
with gr.Column(scale=3):
task_transcript = gr.Textbox(label="Transcription", lines=3)
task_title = gr.Textbox(label="Task Title")
task_button = gr.Button("Create Task", variant="primary")
@handle_exceptions
def create_task_from_voice(audio_path):
if audio_path is None:
return "Please record audio first.", ""
logger.info("Creating task from voice")
# Transcribe the audio
transcript = transcribe_speech(audio_path)
# Extract task title (first sentence or first 50 chars)
if "." in transcript:
title = transcript.split(".")[0] + "."
else:
title = transcript[:min(50, len(transcript))]
if len(transcript) > 50:
title += "..."
return transcript, title
@handle_exceptions
def save_dictated_task(audio_path, transcript, title):
if not transcript.strip() or not title.strip():
return "Please provide task details."
# Create a new task
if "tasks" not in state:
state["tasks"] = []
new_task = {
"id": generate_id(),
"title": title,
"description": transcript,
"status": "To Do",
"priority": "Medium",
"created_at": get_timestamp(),
"due_date": (datetime.datetime.now() + datetime.timedelta(days=7)).strftime("%Y-%m-%d"),
"tags": ["dictated"]
}
state["tasks"].append(new_task)
save_data("tasks.json", state["tasks"])
# Record activity
record_activity({
"type": "task_dictated",
"task_id": new_task["id"],
"timestamp": get_timestamp()
})
return "Task created successfully!"
task_audio.change(
create_task_from_voice,
inputs=[task_audio],
outputs=[task_transcript, task_title]
)
task_button.click(
save_dictated_task,
inputs=[task_audio, task_transcript, task_title],
outputs=[gr.Textbox(label="Status")]
)
# Language Detection Section
with gr.Box():
gr.Markdown("### 🌐 Language Detection")
gr.Markdown("*Automatically detect language from speech*")
with gr.Row():
with gr.Column(scale=2):
lang_audio = gr.Audio(type="filepath", label="Record or Upload Audio")
with gr.Column(scale=3):
lang_output = gr.Textbox(label="Detected Language")
lang_button = gr.Button("Detect Language", variant="primary")
@handle_exceptions
def detect_speech_language(audio_path):
if audio_path is None:
return "Please record or upload audio first."
logger.info("Detecting language from speech")
language_code = detect_language(audio_path)
# Map language code to full name
language_names = {
"en": "English",
"fr": "French",
"es": "Spanish",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"nl": "Dutch",
"ru": "Russian",
"ja": "Japanese",
"zh": "Chinese",
"ar": "Arabic"
}
language_name = language_names.get(language_code, f"Unknown ({language_code})")
# Record activity
record_activity({
"type": "language_detected",
"language": language_code,
"timestamp": get_timestamp()
})
return f"{language_name} ({language_code})"
lang_button.click(
detect_speech_language,
inputs=[lang_audio],
outputs=[lang_output]
)
# Audio Transcription Section
with gr.Box():
gr.Markdown("### 📝 Audio Transcription")
gr.Markdown("*Transcribe longer audio recordings like meetings*")
with gr.Row():
with gr.Column(scale=2):
meeting_audio = gr.Audio(type="filepath", label="Upload Audio Recording")
meeting_title = gr.Textbox(label="Meeting Title", placeholder="Team Meeting")
with gr.Column(scale=3):
meeting_transcript = gr.Textbox(label="Transcription", lines=10)
meeting_button = gr.Button("Transcribe & Save", variant="primary")
@handle_exceptions
def transcribe_meeting(audio_path, title):
if audio_path is None:
return "Please upload audio first."
if not title.strip():
title = "Meeting - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
logger.info(f"Transcribing meeting: {title}")
# Transcribe the audio
transcript = transcribe_speech(audio_path)
# Create a new note for the meeting
if "notes" not in state:
state["notes"] = []
new_note = {
"id": generate_id(),
"title": title,
"content": f"# {title}\n\n{transcript}",
"tags": ["meeting", "transcript"],
"created_at": get_timestamp(),
"updated_at": get_timestamp(),
"audio_path": audio_path # In a real app, you'd copy this to a permanent location
}
state["notes"].append(new_note)
save_data("notes.json", state["notes"])
# Record activity
record_activity({
"type": "meeting_transcribed",
"note_id": new_note["id"],
"timestamp": get_timestamp()
})
return transcript
meeting_button.click(
transcribe_meeting,
inputs=[meeting_audio, meeting_title],
outputs=[meeting_transcript]
)
# Text-to-Speech Section
with gr.Box():
gr.Markdown("### 🔊 Text-to-Speech")
gr.Markdown("*Convert text to speech for accessibility*")
with gr.Row():
with gr.Column(scale=2):
tts_text = gr.Textbox(
label="Text to Convert",
lines=5,
placeholder="Enter text to convert to speech"
)
with gr.Row():
tts_lang = gr.Dropdown(
choices=["en", "fr", "es", "de", "it"],
label="Language",
value="en"
)
tts_slow = gr.Checkbox(label="Slow Speed", value=False)
with gr.Column(scale=3):
tts_output = gr.Audio(label="Generated Speech")
tts_button = gr.Button("Generate Speech", variant="primary")
@handle_exceptions
def convert_text_to_speech(text, lang, slow):
if not text.strip():
return None
logger.info(f"Converting text to speech in {lang}")
# Convert text to speech
audio_data = text_to_speech(text, lang, slow)
# Save to a temporary file
temp_path = os.path.join(os.path.dirname(__file__), "temp_tts.mp3")
with open(temp_path, "wb") as f:
f.write(audio_data)
# Record activity
record_activity({
"type": "text_to_speech",
"language": lang,
"timestamp": get_timestamp()
})
return temp_path
tts_button.click(
convert_text_to_speech,
inputs=[tts_text, tts_lang, tts_slow],
outputs=[tts_output]
)
# Audio Reminders Section
with gr.Box():
gr.Markdown("### ⏰ Audio Reminders")
gr.Markdown("*Create spoken reminders*")
with gr.Row():
with gr.Column(scale=2):
reminder_text = gr.Textbox(
label="Reminder Text",
placeholder="Take a break and stretch"
)
reminder_time = gr.Textbox(
label="Time (HH:MM)",
placeholder="14:30",
value=datetime.datetime.now().strftime("%H:%M")
)
with gr.Column(scale=3):
reminder_preview = gr.Audio(label="Reminder Preview")
reminder_status = gr.Textbox(label="Status")
reminder_button = gr.Button("Create Reminder", variant="primary")
preview_button = gr.Button("Preview")
@handle_exceptions
def preview_reminder(text):
if not text.strip():
return None
logger.info("Previewing reminder")
# Convert text to speech
audio_data = text_to_speech(text, "en", False)
# Save to a temporary file
temp_path = os.path.join(os.path.dirname(__file__), "temp_reminder.mp3")
with open(temp_path, "wb") as f:
f.write(audio_data)
return temp_path
@handle_exceptions
def create_reminder(text, time_str):
if not text.strip() or not time_str.strip():
return "Please provide both reminder text and time."
# Validate time format
try:
hour, minute = map(int, time_str.split(":"))
if hour < 0 or hour > 23 or minute < 0 or minute > 59:
return "Invalid time format. Please use HH:MM (24-hour format)."
except ValueError:
return "Invalid time format. Please use HH:MM (24-hour format)."
logger.info(f"Creating reminder for {time_str}: {text}")
# In a real app, you would schedule this reminder
# For this demo, we'll just save it
# Initialize reminders if not present
if "reminders" not in state:
state["reminders"] = []
# Convert text to speech
audio_data = text_to_speech(text, "en", False)
# Save to a file (in a real app, you'd use a better file naming scheme)
reminder_id = generate_id()
audio_path = os.path.join(os.path.dirname(__file__), f"reminder_{reminder_id}.mp3")
with open(audio_path, "wb") as f:
f.write(audio_data)
# Create reminder object
reminder = {
"id": reminder_id,
"text": text,
"time": time_str,
"audio_path": audio_path,
"created_at": get_timestamp(),
"active": True
}
state["reminders"].append(reminder)
save_data("reminders.json", state["reminders"])
# Record activity
record_activity({
"type": "reminder_created",
"reminder_id": reminder_id,
"timestamp": get_timestamp()
})
return f"Reminder set for {time_str}"
preview_button.click(
preview_reminder,
inputs=[reminder_text],
outputs=[reminder_preview]
)
reminder_button.click(
create_reminder,
inputs=[reminder_text, reminder_time],
outputs=[reminder_status]
)
# Record page visit in activity
record_activity({
"type": "page_viewed",
"page": "Multimedia & Advanced Processing",
"timestamp": get_timestamp()
})