Spaces:

mrradix
/

mona

Runtime error

App Files Files Community

mona / pages /multimedia.py

mrradix

Upload 48 files

8e4018d verified 11 days ago

raw

history blame contribute delete

42.5 kB

	import gradio as gr
	import datetime
	from typing import Dict, List, Any, Union, Optional
	import os
	import numpy as np
	from pathlib import Path
	from PIL import Image
	import io
	import base64

	# Import utilities
	from utils.storage import load_data, save_data
	from utils.state import generate_id, get_timestamp, record_activity
	from utils.multimedia import (
	analyze_image, extract_text_from_image, generate_qr_code, scan_document,
	generate_mind_map, transcribe_speech, text_to_speech, detect_language,
	tag_image, create_diagram
	)
	from utils.logging import get_logger
	from utils.error_handling import handle_exceptions, ValidationError

	# Initialize logger
	logger = get_logger(__name__)

	def create_multimedia_page(state: Dict[str, Any]) -> None:
	"""
	Create the Multimedia Processing page with various media processing tools

	Args:
	state: Application state
	"""
	# Initialize multimedia data if not present
	if "multimedia" not in state:
	state["multimedia"] = {
	"processed_items": [],
	"settings": {
	"default_language": "en",
	"image_quality": "medium",
	"save_processed": True
	}
	}

	# Create the multimedia page layout
	with gr.Column(elem_id="multimedia-page"):
	gr.Markdown("# 🖼️ Multimedia & Advanced Processing")
	gr.Markdown("Process images, audio, and create visual content with AI-powered tools")

	# Create tabs for different multimedia processing categories
	with gr.Tabs() as multimedia_tabs:
	# Vision & Media Processing Tab
	with gr.TabItem("🖼️ Vision & Media Processing"):
	# Image Captioning Section
	with gr.Box():
	gr.Markdown("### 📷 Image Captioning")
	gr.Markdown("Generate descriptive captions for images using BLIP model")

	with gr.Row():
	with gr.Column(scale=2):
	image_input = gr.Image(type="pil", label="Upload Image")
	with gr.Column(scale=3):
	caption_output = gr.Textbox(label="Generated Caption", lines=3)
	caption_button = gr.Button("Generate Caption", variant="primary")

	@handle_exceptions
	def generate_image_caption(image):
	if image is None:
	return "Please upload an image first."

	logger.info("Generating image caption")
	caption = analyze_image(image)

	# Record activity
	record_activity({
	"type": "image_captioned",
	"timestamp": get_timestamp()
	})

	return caption

	caption_button.click(
	generate_image_caption,
	inputs=[image_input],
	outputs=[caption_output]
	)

	# OCR Text Extraction Section
	with gr.Box():
	gr.Markdown("### 📝 OCR Text Extraction")
	gr.Markdown("Extract text from images using OCR technology")

	with gr.Row():
	with gr.Column(scale=2):
	ocr_image_input = gr.Image(type="pil", label="Upload Image")
	with gr.Column(scale=3):
	ocr_text_output = gr.Textbox(label="Extracted Text", lines=5)
	ocr_button = gr.Button("Extract Text", variant="primary")

	@handle_exceptions
	def extract_text(image):
	if image is None:
	return "Please upload an image first."

	logger.info("Extracting text from image")
	text = extract_text_from_image(image)

	# Record activity
	record_activity({
	"type": "ocr_performed",
	"timestamp": get_timestamp()
	})

	return text

	ocr_button.click(
	extract_text,
	inputs=[ocr_image_input],
	outputs=[ocr_text_output]
	)

	# Image Tagging Section
	with gr.Box():
	gr.Markdown("### 🏷️ Image Tagging")
	gr.Markdown("Automatically generate tags for images")

	with gr.Row():
	with gr.Column(scale=2):
	tag_image_input = gr.Image(type="pil", label="Upload Image")
	with gr.Column(scale=3):
	tag_output = gr.Dataframe(
	headers=["Tags"],
	datatype=["str"],
	label="Generated Tags"
	)
	tag_button = gr.Button("Generate Tags", variant="primary")

	@handle_exceptions
	def generate_image_tags(image):
	if image is None:
	return [["Please upload an image first."]]

	logger.info("Generating image tags")
	tags = tag_image(image)

	# Format tags for dataframe
	tags_df = [[tag] for tag in tags]

	# Record activity
	record_activity({
	"type": "image_tagged",
	"timestamp": get_timestamp()
	})

	return tags_df

	tag_button.click(
	generate_image_tags,
	inputs=[tag_image_input],
	outputs=[tag_output]
	)

	# Mind Map Generation Section
	with gr.Box():
	gr.Markdown("### 🧠 Mind Map Generation")
	gr.Markdown("Create visual mind maps from topics")

	with gr.Row():
	with gr.Column(scale=2):
	topics_input = gr.Textbox(
	label="Enter Topics (one per line)",
	lines=5,
	placeholder="Main Topic\nSubtopic 1\nSubtopic 2\nSubtopic 3"
	)
	with gr.Column(scale=3):
	mindmap_output = gr.Image(type="pil", label="Generated Mind Map")
	mindmap_button = gr.Button("Generate Mind Map", variant="primary")

	@handle_exceptions
	def create_mind_map(topics_text):
	if not topics_text.strip():
	return None

	# Parse topics from text
	topics = [topic.strip() for topic in topics_text.split("\n") if topic.strip()]

	if len(topics) < 2:
	raise ValidationError("Please enter at least 2 topics.")

	logger.info(f"Generating mind map with {len(topics)} topics")

	# Generate connections (simple radial structure)
	connections = [(0, i) for i in range(1, len(topics))]

	# Generate mind map
	mind_map = generate_mind_map(topics, connections)

	# Record activity
	record_activity({
	"type": "mind_map_created",
	"timestamp": get_timestamp()
	})

	return mind_map

	mindmap_button.click(
	create_mind_map,
	inputs=[topics_input],
	outputs=[mindmap_output]
	)

	# Document Scanning Section
	with gr.Box():
	gr.Markdown("### 📄 Document Scanning")
	gr.Markdown("Scan documents and extract text")

	with gr.Row():
	with gr.Column(scale=2):
	scan_image_input = gr.Image(type="pil", label="Upload Document Image")
	with gr.Column(scale=3):
	with gr.Row():
	scan_image_output = gr.Image(type="pil", label="Processed Document")
	with gr.Row():
	scan_text_output = gr.Textbox(label="Extracted Text", lines=5)
	scan_button = gr.Button("Scan Document", variant="primary")

	@handle_exceptions
	def scan_doc(image):
	if image is None:
	return None, "Please upload an image first."

	logger.info("Scanning document")
	result = scan_document(image)

	# Record activity
	record_activity({
	"type": "document_scanned",
	"timestamp": get_timestamp()
	})

	return result["processed_image"], result["text"]

	scan_button.click(
	scan_doc,
	inputs=[scan_image_input],
	outputs=[scan_image_output, scan_text_output]
	)

	# QR Code Generator Section
	with gr.Box():
	gr.Markdown("### 📱 QR Code Generator")
	gr.Markdown("Create QR codes from text or URLs")

	with gr.Row():
	with gr.Column(scale=2):
	qr_text_input = gr.Textbox(
	label="Enter Text or URL",
	lines=3,
	placeholder="https://example.com"
	)
	with gr.Row():
	qr_size_input = gr.Slider(
	minimum=5, maximum=20, value=10, step=1,
	label="Box Size"
	)
	qr_border_input = gr.Slider(
	minimum=1, maximum=10, value=4, step=1,
	label="Border Size"
	)
	with gr.Column(scale=3):
	qr_output = gr.Image(type="pil", label="Generated QR Code")
	qr_button = gr.Button("Generate QR Code", variant="primary")

	@handle_exceptions
	def create_qr_code(text, box_size, border):
	if not text.strip():
	return None

	logger.info("Generating QR code")
	qr_code = generate_qr_code(text, int(box_size), int(border))

	# Record activity
	record_activity({
	"type": "qr_code_generated",
	"timestamp": get_timestamp()
	})

	return qr_code

	qr_button.click(
	create_qr_code,
	inputs=[qr_text_input, qr_size_input, qr_border_input],
	outputs=[qr_output]
	)

	# Chart Generation Section
	with gr.Box():
	gr.Markdown("### 📊 Chart Generation")
	gr.Markdown("Create charts and visualizations from data")

	with gr.Row():
	with gr.Column(scale=2):
	chart_type = gr.Dropdown(
	choices=["bar_chart", "pie_chart"],
	label="Chart Type",
	value="bar_chart"
	)
	chart_title = gr.Textbox(label="Chart Title", value="My Chart")
	chart_labels = gr.Textbox(
	label="Labels (comma separated)",
	placeholder="Label1, Label2, Label3",
	value="Category A, Category B, Category C"
	)
	chart_values = gr.Textbox(
	label="Values (comma separated)",
	placeholder="10, 20, 30",
	value="30, 50, 20"
	)
	with gr.Column(scale=3):
	chart_output = gr.Image(type="pil", label="Generated Chart")
	chart_button = gr.Button("Generate Chart", variant="primary")

	@handle_exceptions
	def create_chart(chart_type, title, labels, values):
	if not labels.strip() or not values.strip():
	return None

	# Parse labels and values
	labels_list = [label.strip() for label in labels.split(",") if label.strip()]

	try:
	values_list = [float(val.strip()) for val in values.split(",") if val.strip()]
	except ValueError:
	raise ValidationError("Values must be numbers.")

	if len(labels_list) != len(values_list):
	raise ValidationError("Number of labels must match number of values.")

	logger.info(f"Generating {chart_type} chart")

	# Prepare data for diagram creation
	data = {
	"title": title,
	"labels": labels_list,
	"values": values_list,
	"x_label": "Categories",
	"y_label": "Values"
	}

	# Create chart
	chart = create_diagram(chart_type, data)

	# Record activity
	record_activity({
	"type": "chart_generated",
	"chart_type": chart_type,
	"timestamp": get_timestamp()
	})

	return chart

	chart_button.click(
	create_chart,
	inputs=[chart_type, chart_title, chart_labels, chart_values],
	outputs=[chart_output]
	)

	# Diagram Creation Section
	with gr.Box():
	gr.Markdown("### 📈 Diagram Creation")
	gr.Markdown("Create flowcharts and diagrams")

	with gr.Row():
	with gr.Column(scale=2):
	flowchart_nodes = gr.Textbox(
	label="Nodes (one per line, format: id:label)",
	lines=5,
	placeholder="start:Start\nprocess:Process Data\nend:End",
	value="start:Start\nprocess:Process Data\ndecision:Make Decision\nend:End"
	)
	flowchart_edges = gr.Textbox(
	label="Edges (one per line, format: source->target:label)",
	lines=5,
	placeholder="start->process:begin\nprocess->end:complete",
	value="start->process:begin\nprocess->decision:analyze\ndecision->end:yes\ndecision->process:no, retry"
	)
	with gr.Column(scale=3):
	flowchart_output = gr.Image(type="pil", label="Generated Flowchart")
	flowchart_button = gr.Button("Generate Flowchart", variant="primary")

	@handle_exceptions
	def create_flowchart(nodes_text, edges_text):
	if not nodes_text.strip() or not edges_text.strip():
	return None

	# Parse nodes
	nodes = []
	for line in nodes_text.split("\n"):
	if not line.strip():
	continue

	parts = line.strip().split(":", 1)
	if len(parts) == 2:
	node_id, label = parts
	nodes.append({"id": node_id.strip(), "label": label.strip()})
	else:
	nodes.append({"id": parts[0].strip(), "label": parts[0].strip()})

	# Parse edges
	edges = []
	for line in edges_text.split("\n"):
	if not line.strip():
	continue

	# Check if there's a label
	if ":" in line:
	connection, label = line.strip().split(":", 1)
	else:
	connection, label = line.strip(), ""

	# Parse source and target
	if "->" in connection:
	source, target = connection.split("->", 1)
	edges.append({
	"source": source.strip(),
	"target": target.strip(),
	"label": label.strip()
	})

	logger.info(f"Generating flowchart with {len(nodes)} nodes and {len(edges)} edges")

	# Prepare data for diagram creation
	data = {
	"nodes": nodes,
	"edges": edges
	}

	# Create flowchart
	flowchart = create_diagram("flowchart", data)

	# Record activity
	record_activity({
	"type": "flowchart_created",
	"timestamp": get_timestamp()
	})

	return flowchart

	flowchart_button.click(
	create_flowchart,
	inputs=[flowchart_nodes, flowchart_edges],
	outputs=[flowchart_output]
	)

	# Voice & Audio Features Tab
	with gr.TabItem("🎤 Voice & Audio Features"):
	# Speech-to-Text Section
	with gr.Box():
	gr.Markdown("### 🗣️ Speech-to-Text")
	gr.Markdown("Convert speech to text using Whisper model")

	with gr.Row():
	with gr.Column(scale=2):
	audio_input = gr.Audio(type="filepath", label="Record or Upload Audio")
	with gr.Column(scale=3):
	transcript_output = gr.Textbox(label="Transcription", lines=5)
	transcribe_button = gr.Button("Transcribe", variant="primary")

	@handle_exceptions
	def transcribe_audio(audio_path):
	if audio_path is None:
	return "Please record or upload audio first."

	logger.info("Transcribing audio")
	transcript = transcribe_speech(audio_path)

	# Record activity
	record_activity({
	"type": "speech_transcribed",
	"timestamp": get_timestamp()
	})

	return transcript

	transcribe_button.click(
	transcribe_audio,
	inputs=[audio_input],
	outputs=[transcript_output]
	)

	# Voice Notes Section
	with gr.Box():
	gr.Markdown("### 📝 Voice Notes")
	gr.Markdown("Record voice notes and save them with transcriptions")

	with gr.Row():
	with gr.Column(scale=2):
	voice_note_audio = gr.Audio(type="filepath", label="Record Voice Note")
	voice_note_title = gr.Textbox(label="Note Title", placeholder="Meeting Notes")
	with gr.Column(scale=3):
	voice_note_transcript = gr.Textbox(label="Transcription", lines=5)
	voice_note_button = gr.Button("Save Voice Note", variant="primary")

	@handle_exceptions
	def save_voice_note(audio_path, title):
	if audio_path is None:
	return "Please record audio first."

	if not title.strip():
	title = "Voice Note - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

	logger.info(f"Saving voice note: {title}")

	# Transcribe the audio
	transcript = transcribe_speech(audio_path)

	# Create a new note
	if "notes" not in state:
	state["notes"] = []

	new_note = {
	"id": generate_id(),
	"title": title,
	"content": transcript,
	"tags": ["voice-note"],
	"created_at": get_timestamp(),
	"updated_at": get_timestamp(),
	"audio_path": audio_path # In a real app, you'd copy this to a permanent location
	}

	state["notes"].append(new_note)
	save_data("notes.json", state["notes"])

	# Record activity
	record_activity({
	"type": "voice_note_created",
	"note_id": new_note["id"],
	"timestamp": get_timestamp()
	})

	return transcript

	voice_note_button.click(
	save_voice_note,
	inputs=[voice_note_audio, voice_note_title],
	outputs=[voice_note_transcript]
	)

	# Task Dictation Section
	with gr.Box():
	gr.Markdown("### 📋 Task Dictation")
	gr.Markdown("Create tasks using voice commands")

	with gr.Row():
	with gr.Column(scale=2):
	task_audio = gr.Audio(type="filepath", label="Record Task")
	with gr.Column(scale=3):
	task_transcript = gr.Textbox(label="Transcription", lines=3)
	task_title = gr.Textbox(label="Task Title")
	task_button = gr.Button("Create Task", variant="primary")

	@handle_exceptions
	def create_task_from_voice(audio_path):
	if audio_path is None:
	return "Please record audio first.", ""

	logger.info("Creating task from voice")

	# Transcribe the audio
	transcript = transcribe_speech(audio_path)

	# Extract task title (first sentence or first 50 chars)
	if "." in transcript:
	title = transcript.split(".")[0] + "."
	else:
	title = transcript[:min(50, len(transcript))]
	if len(transcript) > 50:
	title += "..."

	return transcript, title

	@handle_exceptions
	def save_dictated_task(audio_path, transcript, title):
	if not transcript.strip() or not title.strip():
	return "Please provide task details."

	# Create a new task
	if "tasks" not in state:
	state["tasks"] = []

	new_task = {
	"id": generate_id(),
	"title": title,
	"description": transcript,
	"status": "To Do",
	"priority": "Medium",
	"created_at": get_timestamp(),
	"due_date": (datetime.datetime.now() + datetime.timedelta(days=7)).strftime("%Y-%m-%d"),
	"tags": ["dictated"]
	}

	state["tasks"].append(new_task)
	save_data("tasks.json", state["tasks"])

	# Record activity
	record_activity({
	"type": "task_dictated",
	"task_id": new_task["id"],
	"timestamp": get_timestamp()
	})

	return "Task created successfully!"

	task_audio.change(
	create_task_from_voice,
	inputs=[task_audio],
	outputs=[task_transcript, task_title]
	)

	task_button.click(
	save_dictated_task,
	inputs=[task_audio, task_transcript, task_title],
	outputs=[gr.Textbox(label="Status")]
	)

	# Language Detection Section
	with gr.Box():
	gr.Markdown("### 🌐 Language Detection")
	gr.Markdown("Automatically detect language from speech")

	with gr.Row():
	with gr.Column(scale=2):
	lang_audio = gr.Audio(type="filepath", label="Record or Upload Audio")
	with gr.Column(scale=3):
	lang_output = gr.Textbox(label="Detected Language")
	lang_button = gr.Button("Detect Language", variant="primary")

	@handle_exceptions
	def detect_speech_language(audio_path):
	if audio_path is None:
	return "Please record or upload audio first."

	logger.info("Detecting language from speech")
	language_code = detect_language(audio_path)

	# Map language code to full name
	language_names = {
	"en": "English",
	"fr": "French",
	"es": "Spanish",
	"de": "German",
	"it": "Italian",
	"pt": "Portuguese",
	"nl": "Dutch",
	"ru": "Russian",
	"ja": "Japanese",
	"zh": "Chinese",
	"ar": "Arabic"
	}

	language_name = language_names.get(language_code, f"Unknown ({language_code})")

	# Record activity
	record_activity({
	"type": "language_detected",
	"language": language_code,
	"timestamp": get_timestamp()
	})

	return f"{language_name} ({language_code})"

	lang_button.click(
	detect_speech_language,
	inputs=[lang_audio],
	outputs=[lang_output]
	)

	# Audio Transcription Section
	with gr.Box():
	gr.Markdown("### 📝 Audio Transcription")
	gr.Markdown("Transcribe longer audio recordings like meetings")

	with gr.Row():
	with gr.Column(scale=2):
	meeting_audio = gr.Audio(type="filepath", label="Upload Audio Recording")
	meeting_title = gr.Textbox(label="Meeting Title", placeholder="Team Meeting")
	with gr.Column(scale=3):
	meeting_transcript = gr.Textbox(label="Transcription", lines=10)
	meeting_button = gr.Button("Transcribe & Save", variant="primary")

	@handle_exceptions
	def transcribe_meeting(audio_path, title):
	if audio_path is None:
	return "Please upload audio first."

	if not title.strip():
	title = "Meeting - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

	logger.info(f"Transcribing meeting: {title}")

	# Transcribe the audio
	transcript = transcribe_speech(audio_path)

	# Create a new note for the meeting
	if "notes" not in state:
	state["notes"] = []

	new_note = {
	"id": generate_id(),
	"title": title,
	"content": f"# {title}\n\n{transcript}",
	"tags": ["meeting", "transcript"],
	"created_at": get_timestamp(),
	"updated_at": get_timestamp(),
	"audio_path": audio_path # In a real app, you'd copy this to a permanent location
	}

	state["notes"].append(new_note)
	save_data("notes.json", state["notes"])

	# Record activity
	record_activity({
	"type": "meeting_transcribed",
	"note_id": new_note["id"],
	"timestamp": get_timestamp()
	})

	return transcript

	meeting_button.click(
	transcribe_meeting,
	inputs=[meeting_audio, meeting_title],
	outputs=[meeting_transcript]
	)

	# Text-to-Speech Section
	with gr.Box():
	gr.Markdown("### 🔊 Text-to-Speech")
	gr.Markdown("Convert text to speech for accessibility")

	with gr.Row():
	with gr.Column(scale=2):
	tts_text = gr.Textbox(
	label="Text to Convert",
	lines=5,
	placeholder="Enter text to convert to speech"
	)
	with gr.Row():
	tts_lang = gr.Dropdown(
	choices=["en", "fr", "es", "de", "it"],
	label="Language",
	value="en"
	)
	tts_slow = gr.Checkbox(label="Slow Speed", value=False)
	with gr.Column(scale=3):
	tts_output = gr.Audio(label="Generated Speech")
	tts_button = gr.Button("Generate Speech", variant="primary")

	@handle_exceptions
	def convert_text_to_speech(text, lang, slow):
	if not text.strip():
	return None

	logger.info(f"Converting text to speech in {lang}")

	# Convert text to speech
	audio_data = text_to_speech(text, lang, slow)

	# Save to a temporary file
	temp_path = os.path.join(os.path.dirname(__file__), "temp_tts.mp3")
	with open(temp_path, "wb") as f:
	f.write(audio_data)

	# Record activity
	record_activity({
	"type": "text_to_speech",
	"language": lang,
	"timestamp": get_timestamp()
	})

	return temp_path

	tts_button.click(
	convert_text_to_speech,
	inputs=[tts_text, tts_lang, tts_slow],
	outputs=[tts_output]
	)

	# Audio Reminders Section
	with gr.Box():
	gr.Markdown("### ⏰ Audio Reminders")
	gr.Markdown("Create spoken reminders")

	with gr.Row():
	with gr.Column(scale=2):
	reminder_text = gr.Textbox(
	label="Reminder Text",
	placeholder="Take a break and stretch"
	)
	reminder_time = gr.Textbox(
	label="Time (HH:MM)",
	placeholder="14:30",
	value=datetime.datetime.now().strftime("%H:%M")
	)
	with gr.Column(scale=3):
	reminder_preview = gr.Audio(label="Reminder Preview")
	reminder_status = gr.Textbox(label="Status")
	reminder_button = gr.Button("Create Reminder", variant="primary")
	preview_button = gr.Button("Preview")

	@handle_exceptions
	def preview_reminder(text):
	if not text.strip():
	return None

	logger.info("Previewing reminder")

	# Convert text to speech
	audio_data = text_to_speech(text, "en", False)

	# Save to a temporary file
	temp_path = os.path.join(os.path.dirname(__file__), "temp_reminder.mp3")
	with open(temp_path, "wb") as f:
	f.write(audio_data)

	return temp_path

	@handle_exceptions
	def create_reminder(text, time_str):
	if not text.strip() or not time_str.strip():
	return "Please provide both reminder text and time."

	# Validate time format
	try:
	hour, minute = map(int, time_str.split(":"))
	if hour < 0 or hour > 23 or minute < 0 or minute > 59:
	return "Invalid time format. Please use HH:MM (24-hour format)."
	except ValueError:
	return "Invalid time format. Please use HH:MM (24-hour format)."

	logger.info(f"Creating reminder for {time_str}: {text}")

	# In a real app, you would schedule this reminder
	# For this demo, we'll just save it

	# Initialize reminders if not present
	if "reminders" not in state:
	state["reminders"] = []

	# Convert text to speech
	audio_data = text_to_speech(text, "en", False)

	# Save to a file (in a real app, you'd use a better file naming scheme)
	reminder_id = generate_id()
	audio_path = os.path.join(os.path.dirname(__file__), f"reminder_{reminder_id}.mp3")
	with open(audio_path, "wb") as f:
	f.write(audio_data)

	# Create reminder object
	reminder = {
	"id": reminder_id,
	"text": text,
	"time": time_str,
	"audio_path": audio_path,
	"created_at": get_timestamp(),
	"active": True
	}

	state["reminders"].append(reminder)
	save_data("reminders.json", state["reminders"])

	# Record activity
	record_activity({
	"type": "reminder_created",
	"reminder_id": reminder_id,
	"timestamp": get_timestamp()
	})

	return f"Reminder set for {time_str}"

	preview_button.click(
	preview_reminder,
	inputs=[reminder_text],
	outputs=[reminder_preview]
	)

	reminder_button.click(
	create_reminder,
	inputs=[reminder_text, reminder_time],
	outputs=[reminder_status]
	)

	# Record page visit in activity
	record_activity({
	"type": "page_viewed",
	"page": "Multimedia & Advanced Processing",
	"timestamp": get_timestamp()
	})