Spaces:
Build error
Build error
import gradio as gr | |
from pathlib import Path | |
import datetime | |
import re | |
import os | |
import shutil | |
import io | |
import base64 | |
from collections import defaultdict | |
from PIL import Image | |
import json | |
# Document Generation Libs | |
from docx import Document | |
import openpyxl | |
from pypdf import PdfWriter | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, BaseDocTemplate, Frame, PageTemplate | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter, A4, landscape | |
from reportlab.lib.units import inch | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
# Media Libs | |
import fitz # PyMuPDF | |
# --- Configuration & Setup --- | |
CWD = Path.cwd() | |
OUTPUT_DIR = CWD / "generated_outputs" | |
PREVIEW_DIR = CWD / "previews" | |
UPLOAD_DIR = CWD / "uploads" | |
FONT_DIR = CWD | |
# Create necessary directories | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
PREVIEW_DIR.mkdir(exist_ok=True) | |
UPLOAD_DIR.mkdir(exist_ok=True) | |
LAYOUTS = { | |
"A4 Portrait": {"size": A4}, | |
"A4 Landscape": {"size": landscape(A4)}, | |
"Letter Portrait": {"size": letter}, | |
"Letter Landscape": {"size": landscape(letter)}, | |
} | |
# --- โ๏ธ Document Generation Engines --- | |
def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns): | |
"""๐ Builds a beautiful PDF from a Markdown story using ReportLab.""" | |
pdf_buffer = io.BytesIO() | |
story = markdown_to_story(md_content, font_name, emoji_font) | |
if num_columns > 1: | |
doc = BaseDocTemplate(pdf_buffer, pagesize=pagesize, leftMargin=0.5 * inch, rightMargin=0.5 * inch) | |
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1 * inch | |
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2 * inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)] | |
doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)]) | |
else: | |
doc = SimpleDocTemplate(pdf_buffer, pagesize=pagesize) | |
doc.build(story) | |
pdf_buffer.seek(0) | |
return pdf_buffer | |
def create_docx(md_content): | |
"""๐ Crafts a DOCX document, translating Markdown to Word elements.""" | |
document = Document() | |
for line in md_content.split('\n'): | |
if line.startswith('# '): document.add_heading(line[2:], level=1) | |
elif line.startswith('## '): document.add_heading(line[3:], level=2) | |
elif line.strip().startswith(('- ', '* ')): document.add_paragraph(line.strip()[2:], style='List Bullet') | |
else: | |
p = document.add_paragraph() | |
parts = re.split(r'(\*\*.*?\*\*)', line) | |
for part in parts: | |
if part.startswith('**') and part.endswith('**'): p.add_run(part[2:-2]).bold = True | |
else: p.add_run(part) | |
return document | |
def create_xlsx(md_content): | |
"""๐ Organizes a Markdown outline into columns in an XLSX file.""" | |
workbook = openpyxl.Workbook(); sheet = workbook.active | |
sections = re.split(r'\n# ', '\n' + md_content) | |
if sections and sections[0] == '': sections.pop(0) | |
column_data = [] | |
for section in sections: | |
lines = section.split('\n'); header = lines[0] | |
content = [l.strip() for l in lines[1:] if l.strip()] | |
column_data.append({'header': header, 'content': content}) | |
for c_idx, col in enumerate(column_data, 1): | |
sheet.cell(row=1, column=c_idx, value=col['header']) | |
for r_idx, line_content in enumerate(col['content'], 2): | |
sheet.cell(row=r_idx, column=c_idx, value=line_content) | |
return workbook | |
def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str): | |
"""๐ Translates Markdown text into a sequence of ReportLab flowables for PDF rendering.""" | |
styles = getSampleStyleSheet() | |
bold_font = f"{font_name}-Bold" if font_name != "Helvetica" else "Helvetica-Bold" | |
style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10, leading=14) | |
style_h1 = ParagraphStyle('h1', fontName=bold_font, spaceBefore=12, fontSize=24, textColor=colors.HexColor("#1E3A8A")) | |
style_h2 = ParagraphStyle('h2', fontName=bold_font, spaceBefore=10, fontSize=18, textColor=colors.HexColor("#374151")) | |
style_h3 = ParagraphStyle('h3', fontName=bold_font, spaceBefore=8, fontSize=14, textColor=colors.HexColor("#4B5563")) | |
style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8) | |
story, first_heading = [], True | |
for line in markdown_text.split('\n'): | |
stripped_line = line.strip() | |
if not stripped_line: | |
story.append(Spacer(1, 0.1 * inch)); continue | |
# Determine the structural element and its style | |
content, style, extra_args = stripped_line, style_normal, {} | |
if stripped_line.startswith("# "): | |
if not first_heading: story.append(PageBreak()) | |
content, style, first_heading = stripped_line.lstrip('# '), style_h1, False | |
elif stripped_line.startswith("## "): | |
content, style = stripped_line.lstrip('## '), style_h2 | |
elif stripped_line.startswith("### "): | |
content, style = stripped_line.lstrip('### '), style_h3 | |
elif stripped_line.startswith(("- ", "* ")): | |
content, extra_args = stripped_line[2:], {'bulletText': 'โข'} | |
# Now, format the content string correctly for ReportLab | |
# Apply bold/italic first | |
formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content)) | |
# Then, apply the emoji font tags. This order is crucial. | |
final_content = apply_emoji_font(formatted_content, emoji_font) | |
story.append(Paragraph(final_content, style, **extra_args)) | |
return story | |
# --- ๐ฎ Virtual AI Omni-Model Functions --- | |
def process_text_input(prompt): | |
"""๐ฌ Simulates an AI response to a text prompt.""" | |
return f"# Virtual AI Response\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n- This is a simulated response for your text input.\n- Here's an emoji: ๐" | |
def process_image_input(image_path, prompt): | |
"""๐ผ๏ธ Simulates an AI description of an image.""" | |
return f"# Virtual AI Image Analysis: {Path(image_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n1. Simulated analysis of the uploaded image.\n2. File type appears to be `{Path(image_path).suffix}`." | |
def process_audio_input(audio_path, prompt): | |
"""๐ค Simulates AI transcription and summarization of an audio file.""" | |
return f"# Virtual AI Audio Summary: {Path(audio_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Simulated Transcription:**\n> \"This is a test of the emergency broadcast system.\"\n\n**Generated Summary:**\nThe audio is a test broadcast." | |
def process_pdf_input(pdf_path, prompt, progress): | |
"""๐ Simulates AI-powered OCR of a PDF document.""" | |
progress(0.5, desc="Simulating PDF page processing...") | |
ocr_text = f"# Virtual AI OCR of: {Path(pdf_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Extracted Content (Simulated):**\n- **Page 1:** Simulated text from the first page.\n- **Page 2:** Simulated text from the second page." | |
progress(1.0, desc="PDF OCR Simulation Complete!") | |
return ocr_text | |
# --- ๐ ๏ธ Helpers & Main API --- | |
def register_local_fonts(): | |
"""โ๏ธ Scans for local .ttf fonts and registers them for PDF creation.""" | |
text_font_names, emoji_font_name = [], None | |
font_files = list(FONT_DIR.glob("*.ttf")) | |
for font_path in font_files: | |
try: | |
font_name = font_path.stem | |
pdfmetrics.registerFont(TTFont(font_name, str(font_path))) | |
pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path))) | |
pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold") | |
if "notocoloremoji-regular" in font_name.lower(): | |
emoji_font_name = font_name | |
else: | |
text_font_names.append(font_name) | |
except: pass | |
if not text_font_names: text_font_names.append('Helvetica') | |
return sorted(text_font_names), emoji_font_name | |
def apply_emoji_font(text: str, emoji_font_name: str) -> str: | |
"""๐ Finds emojis and wraps them in special font tags for the PDF.""" | |
if not emoji_font_name: return text | |
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}]+)") | |
return emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', text) | |
def create_pdf_preview(pdf_path: Path): | |
"""๐๏ธ Generates a PNG thumbnail for the first page of a PDF.""" | |
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png" | |
try: | |
doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap(dpi=96) | |
pix.save(str(preview_path)); doc.close() | |
return preview_path | |
except: return None | |
def build_file_explorer_html(generated_files, pdf_files_for_gallery): | |
"""๐๏ธ Constructs the HTML/JS for the file explorer and PDF gallery.""" | |
file_explorer_html = "" | |
file_icons = {".pdf": "๐", ".docx": "๐", ".xlsx": "๐"} | |
for file_path in generated_files: | |
icon = file_icons.get(file_path.suffix, '๐') | |
file_explorer_html += f""" | |
<a href="/file={file_path}" class="file-link" download="{file_path.name}"> | |
<span class="file-icon">{icon}</span> | |
<span class="file-name">{file_path.name}</span> | |
</a> | |
""" | |
gallery_items = [] | |
for pdf_path in pdf_files_for_gallery: | |
preview_path = create_pdf_preview(pdf_path) | |
if preview_path: | |
with open(preview_path, "rb") as f: | |
img_base64 = base64.b64encode(f.read()).decode("utf-8") | |
gallery_items.append({ | |
"preview_src": f"data:image/png;base64,{img_base64}", | |
"filename": pdf_path.name | |
}) | |
gallery_html = "" | |
if gallery_items: | |
thumbs_html = "" | |
for item in gallery_items: | |
thumbs_html += f'<img src="{item["preview_src"]}" class="thumbnail" onclick="selectThumbnail(this, \'{item["preview_src"]}\', \'{item["filename"]}\')">' | |
gallery_html = f""" | |
<div class="gallery-container"> | |
<div class="main-view"> | |
<img id="main-image" src="{gallery_items[0]['preview_src']}" class="main-image"> | |
<p id="main-filename">{gallery_items[0]['filename']}</p> | |
</div> | |
<div class="thumbnail-strip">{thumbs_html}</div> | |
</div> | |
""" | |
html = f""" | |
<style> | |
.tabs {{ display: flex; border-bottom: 2px solid #ccc; }} | |
.tab-button {{ padding: 10px 15px; cursor: pointer; background: #f1f1f1; border: none; border-bottom: 2px solid transparent; outline: none; }} | |
.tab-button.active {{ background: #fff; border-top: 2px solid #007bff; border-left: 2px solid #ccc; border-right: 2px solid #ccc; border-bottom: 2px solid #fff; }} | |
.tab-content {{ display: none; padding: 15px; border: 1px solid #ccc; border-top: none; }} | |
.tab-content.active {{ display: block; }} | |
.file-explorer {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 10px; }} | |
.file-link {{ display: flex; align-items: center; padding: 10px; background: #f9f9f9; border-radius: 5px; text-decoration: none; color: #333; }} | |
.file-link:hover {{ background: #e9e9e9; }} | |
.file-icon {{ font-size: 2.5em; margin-right: 10px; }} | |
.gallery-container {{ display: flex; height: 500px; }} | |
.main-view {{ flex: 3; padding: 10px; display: flex; flex-direction: column; align-items: center; justify-content: center; }} | |
.main-image {{ max-width: 100%; max-height: 90%; }} | |
.thumbnail-strip {{ flex: 1; overflow-y: auto; padding: 5px; }} | |
.thumbnail {{ width: 100%; margin-bottom: 5px; cursor: pointer; border: 2px solid transparent; }} | |
.thumbnail.active {{ border-color: #007bff; }} | |
</style> | |
<div class="tabs"> | |
<button class="tab-button active" onclick="openTab(event, 'explorer')">๐๏ธ File Explorer</button> | |
{'<button class="tab-button" onclick="openTab(event, \'gallery\')">๐ผ๏ธ PDF Gallery</button>' if gallery_items else ''} | |
</div> | |
<div id="explorer" class="tab-content active"> | |
<div class="file-explorer">{file_explorer_html}</div> | |
</div> | |
<div id="gallery" class="tab-content"> | |
{gallery_html} | |
</div> | |
<script> | |
function openTab(evt, tabName) {{ | |
var i, tabcontent, tablinks; | |
tabcontent = document.getElementsByClassName("tab-content"); | |
for (i = 0; i < tabcontent.length; i++) {{ tabcontent[i].style.display = "none"; }} | |
tablinks = document.getElementsByClassName("tab-button"); | |
for (i = 0; i < tablinks.length; i++) {{ tablinks[i].className = tablinks[i].className.replace(" active", ""); }} | |
document.getElementById(tabName).style.display = "block"; | |
evt.currentTarget.className += " active"; | |
}} | |
const mainImage = document.getElementById('main-image'); | |
const mainFilename = document.getElementById('main-filename'); | |
const thumbnails = document.querySelectorAll('.thumbnail'); | |
if (thumbnails.length > 0) thumbnails[0].classList.add('active'); | |
function selectThumbnail(selectedThumb, imgSrc, filename) {{ | |
mainImage.src = imgSrc; mainFilename.textContent = filename; | |
thumbnails.forEach(t => t.classList.remove('active')); | |
selectedThumb.classList.add('active'); | |
}}; | |
</script> | |
""" | |
return html | |
def generate_outputs_api(omni_files, omni_prompt, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)): | |
"""๐ The main entry point that orchestrates the entire multi-modal generation process.""" | |
if not omni_prompt and not omni_files: raise gr.Error("Please provide a prompt or upload at least one file.") | |
if not output_formats: raise gr.Error("Please select at least one output format.") | |
shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True) | |
OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir() | |
md_content = "" | |
if omni_files: | |
temp_paths = [] | |
for f in omni_files: | |
temp_path = UPLOAD_DIR / Path(f.name).name | |
shutil.copyfile(f.name, temp_path) | |
temp_paths.append(temp_path) | |
file_path = temp_paths[0] | |
file_ext = file_path.suffix.lower() | |
if file_ext == '.md': md_content = "\n\n".join([p.read_text(encoding='utf-8') for p in temp_paths if p.suffix.lower() == '.md']) | |
elif file_ext == '.pdf': md_content = process_pdf_input(file_path, omni_prompt or "Extract text", progress) | |
elif file_ext in ['.png', '.jpg', '.jpeg']: md_content = process_image_input(file_path, omni_prompt or "Describe image") | |
elif file_ext in ['.wav', '.mp3']: md_content = process_audio_input(file_path, omni_prompt or "Summarize transcription") | |
elif omni_prompt: | |
md_content = process_text_input(omni_prompt) | |
if not md_content: raise gr.Error("Failed to generate source content.") | |
generated_files, pdf_files_for_gallery = [], [] | |
for format_choice in progress.tqdm(output_formats, desc="Generating Formats"): | |
time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper() | |
if format_choice == "PDF": | |
for layout_name in layouts: | |
for font_name in fonts: | |
pagesize = LAYOUTS[layout_name]["size"] | |
final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult) | |
pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns) | |
filename = f"Document_{time_str}.pdf" | |
output_path = OUTPUT_DIR / filename | |
with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue()) | |
generated_files.append(output_path); pdf_files_for_gallery.append(output_path) | |
elif format_choice == "DOCX": | |
doc = create_docx(md_content); filename = f"Document_{time_str}.docx" | |
output_path = OUTPUT_DIR / filename; doc.save(output_path); generated_files.append(output_path) | |
elif format_choice == "XLSX": | |
book = create_xlsx(md_content); filename = f"Outline_{time_str}.xlsx" | |
output_path = OUTPUT_DIR / filename; book.save(output_path); generated_files.append(output_path) | |
final_html_output = build_file_explorer_html(generated_files, pdf_files_for_gallery) | |
return md_content, final_html_output | |
# --- ๐จ Gradio UI Definition --- | |
AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts() | |
SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons | |
1. ๐ **Introduction** | |
- **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals! | |
- **Usage**: A guide for learning and storytelling across traditions. ๏ธ | |
- **Themes**: Justice โ๏ธ, faith ๐, hubris ๐๏ธ, redemption โจ, cosmic order ๐. | |
# โ๏ธ Arthurian Legends | |
- **Merlin, Morgan le Fay, Arthur**: Mentor ๐ง, rival ๐งโโ๏ธ, son ๐. | |
- **Relation**: Family tests loyalty ๐ค. | |
- **Lesson**: Honor ๐๏ธ vs. betrayal ๐ก๏ธ. | |
""" | |
with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as demo: | |
gr.Markdown("# ๐ง Omni-Model Document Generator") | |
gr.Markdown("Provide a prompt, or upload a file (MD, PDF, Image, Audio). A virtual AI will process it, and you can generate documents from the result.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Tabs(): | |
with gr.TabItem("๐ฌ Text"): | |
text_prompt = gr.Textbox(label="Prompt", lines=5, placeholder="Ask a question or provide instructions...") | |
with gr.TabItem("๐ผ๏ธ Image"): | |
image_prompt = gr.Textbox(label="Image Prompt", lines=2, placeholder="e.g., Describe this picture") | |
image_file = gr.File(label="Upload Image", file_types=["image"]) | |
with gr.TabItem("๐ค Audio"): | |
audio_prompt = gr.Textbox(label="Audio Prompt", lines=2, placeholder="e.g., Summarize this audio") | |
audio_file = gr.File(label="Upload Audio", file_types=[".wav", ".mp3"]) | |
with gr.TabItem("๐ Document"): | |
doc_prompt = gr.Textbox(label="Document Prompt", lines=2, placeholder="e.g., Extract text from this PDF") | |
doc_file = gr.File(label="Upload MD or PDF", file_types=[".md", ".pdf"]) | |
gr.Markdown("### ๐ Output Settings") | |
output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"]) | |
with gr.Accordion("PDF Customization", open=False): | |
num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1) | |
page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1) | |
page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1) | |
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["A4 Portrait"]) | |
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else []) | |
generate_btn = gr.Button("๐ Generate Documents", variant="primary") | |
with gr.Column(scale=2): | |
gr.Markdown("### ๐ค AI Response (Source for Documents)") | |
ai_response_output = gr.Markdown(label="AI Generated Content") | |
gr.Markdown("### ๐๏ธ Generated Files") | |
file_explorer_output = gr.HTML(label="File Explorer & Gallery") | |
def master_process(p1, p2, p3, p4, f1, f2, f3, f4, *args): | |
# Determine active tab and route to the API | |
if f1: return generate_outputs_api([f1], p1 or "Describe this text", *args) | |
if f2: return generate_outputs_api([f2], p2 or "Describe this image", *args) | |
if f3: return generate_outputs_api([f3], p3 or "Summarize this audio", *args) | |
if f4: return generate_outputs_api([f4], p4 or "Process this document", *args) | |
if p1: return generate_outputs_api(None, p1, *args) | |
raise gr.Error("Please provide an input in one of the tabs.") | |
generate_btn.click(fn=master_process, | |
inputs=[text_prompt, image_prompt, audio_prompt, doc_prompt, text_prompt, image_file, audio_file, doc_file, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider], | |
outputs=[ai_response_output, file_explorer_output]) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |