Spaces:
Build error
Build error
import gradio as gr | |
from pathlib import Path | |
import datetime | |
import re | |
import os | |
import shutil | |
import fitz # PyMuPDF | |
from PIL import Image | |
import io | |
# Imports for new formats | |
from docx import Document | |
import openpyxl | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
# --- Configuration & Setup --- | |
CWD = Path.cwd() | |
LAYOUTS = { | |
"A4 Portrait": {"size": A4}, | |
"A4 Landscape": {"size": landscape(A4)}, | |
"Letter Portrait": {"size": letter}, | |
"Letter Landscape": {"size": landscape(letter)}, | |
} | |
OUTPUT_DIR = CWD / "generated_outputs" | |
PREVIEW_DIR = CWD / "previews" | |
FONT_DIR = CWD | |
# Create necessary directories | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
PREVIEW_DIR.mkdir(exist_ok=True) | |
# --- Font & Emoji Handling --- | |
EMOJI_FONT_PATH = None | |
EMOJI_IMAGE_CACHE = {} | |
def setup_fonts(): | |
"""Finds and registers all .ttf files from the app directory.""" | |
global EMOJI_FONT_PATH | |
text_font_names = [] | |
noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf" | |
if noto_emoji_path.exists(): | |
EMOJI_FONT_PATH = str(noto_emoji_path) | |
print(f"Found emoji font: {EMOJI_FONT_PATH}") | |
else: | |
raise FileNotFoundError("CRITICAL: 'NotoColorEmoji-Regular.ttf' not found. This file is required for emoji support.") | |
for font_path in FONT_DIR.glob("*.ttf"): | |
try: | |
font_name = font_path.stem | |
pdfmetrics.registerFont(TTFont(font_name, str(font_path))) | |
pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=font_name, italic=font_name, boldItalic=font_name) | |
if "notocoloremoji" not in font_name.lower(): | |
text_font_names.append(font_name) | |
except Exception as e: | |
print(f"Could not register font {font_path.name}: {e}") | |
if not text_font_names: | |
text_font_names.append('Helvetica') | |
return sorted(text_font_names) | |
def render_emoji_as_image(emoji_char, size_pt): | |
"""Renders an emoji character as a PNG image in memory.""" | |
if not EMOJI_FONT_PATH: return None | |
if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE: return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] | |
try: | |
rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5) | |
doc = fitz.open() | |
page = doc.new_page(width=rect.width, height=rect.height) | |
page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH) | |
page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt) | |
pix = page.get_pixmap(alpha=True, dpi=300) | |
doc.close() | |
img_buffer = io.BytesIO(pix.tobytes("png")) | |
img_buffer.seek(0) | |
EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer | |
return img_buffer | |
except Exception as e: | |
print(f"ERROR: Could not render emoji '{emoji_char}': {e}") | |
return None | |
# --- Document Generation Engines --- | |
def markdown_to_story(markdown_text: str, font_name: str): | |
"""Converts markdown to a ReportLab story, converting emojis to images.""" | |
styles = getSampleStyleSheet() | |
style_body = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=11, leading=14) | |
style_h1 = ParagraphStyle('h1', fontName=font_name, fontSize=24, leading=28, spaceAfter=12, textColor=colors.darkblue) | |
style_h2 = ParagraphStyle('h2', fontName=font_name, fontSize=18, leading=22, spaceAfter=10, textColor=colors.darkslateblue) | |
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)") | |
def text_to_flowables(text, style): | |
parts = emoji_pattern.split(text) | |
flowables = [] | |
for part in parts: | |
if not part: continue | |
if emoji_pattern.match(part): | |
for emoji_char in part: | |
img_buffer = render_emoji_as_image(emoji_char, style.fontSize) | |
if img_buffer: | |
img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2) | |
flowables.append(img) | |
else: | |
formatted_part = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', part) | |
flowables.append(Paragraph(formatted_part, style)) | |
return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0, 0), (-1, -1), 'MIDDLE')]) if flowables else Spacer(0,0) | |
story, first_heading = [], True | |
for line in markdown_text.split('\n'): | |
stripped = line.strip() | |
if stripped.startswith("# "): | |
if not first_heading: story.append(PageBreak()) | |
story.append(text_to_flowables(stripped[2:], style_h1)); first_heading = False | |
elif stripped.startswith("## "): | |
story.append(text_to_flowables(stripped[3:], style_h2)) | |
elif stripped.startswith(("- ", "* ")): | |
story.append(text_to_flowables(stripped[2:], ParagraphStyle(parent=style_body, leftIndent=20, bulletIndent=10))) | |
elif stripped: | |
story.append(text_to_flowables(stripped, style_body)) | |
else: | |
story.append(Spacer(1, 0.2*inch)) | |
return story | |
def create_pdf(md_content, font_name, pagesize, num_columns): | |
"""Generates a PDF file from markdown content.""" | |
md_buffer = io.BytesIO() | |
story = markdown_to_story(md_content, font_name) | |
if num_columns > 1: | |
doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch) | |
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch | |
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)] | |
doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)]) | |
else: | |
doc = SimpleDocTemplate(md_buffer, pagesize=pagesize) | |
doc.build(story) | |
return md_buffer | |
def create_docx(md_content): | |
"""Generates a DOCX file from markdown content.""" | |
document = Document() | |
for line in md_content.split('\n'): | |
line = line.strip() | |
if line.startswith('# '): document.add_heading(line[2:], level=1) | |
elif line.startswith('## '): document.add_heading(line[3:], level=2) | |
elif line.startswith(('- ','* ')): document.add_paragraph(line[2:], style='List Bullet') | |
elif line: | |
p = document.add_paragraph() | |
parts = re.split(r'(\*\*.*?\*\*)', line) | |
for part in parts: | |
if part.startswith('**') and part.endswith('**'): | |
p.add_run(part[2:-2]).bold = True | |
else: | |
p.add_run(part) | |
return document | |
def create_xlsx(md_content): | |
"""Generates an XLSX file, splitting content by H1 headers into columns.""" | |
workbook = openpyxl.Workbook() | |
sheet = workbook.active | |
sections = re.split(r'\n# ', '\n' + md_content) | |
if not sections[0].strip(): sections.pop(0) | |
for c_idx, section in enumerate(sections, 1): | |
lines = section.split('\n') | |
sheet.cell(row=1, column=c_idx, value=lines[0].strip()) # Header | |
for r_idx, line_content in enumerate(lines[1:], 2): | |
sheet.cell(row=r_idx, column=c_idx, value=line_content.strip()) | |
return workbook | |
def create_pdf_preview(pdf_path: Path): | |
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png" | |
try: | |
doc = fitz.open(pdf_path) | |
page = doc.load_page(0) | |
pix = page.get_pixmap(dpi=150) | |
pix.save(str(preview_path)) | |
doc.close() | |
return str(preview_path) | |
except Exception as e: | |
print(f"Preview failed for {pdf_path.name}: {e}") | |
return None | |
# --- Main API Function --- | |
def generate_outputs_api(files, output_formats, layouts, fonts, num_columns, progress=gr.Progress(track_tqdm=True)): | |
if not files: raise gr.Error("Please upload at least one Markdown (.md) file.") | |
if not output_formats: raise gr.Error("Please select at least one output format.") | |
shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True) | |
OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir() | |
EMOJI_IMAGE_CACHE.clear() | |
# Consolidate all markdown content, adding H1 headers between files | |
md_contents = [] | |
for f in files: | |
try: | |
md_contents.append(Path(f.name).read_text(encoding='utf-8')) | |
except Exception as e: | |
print(f"Error reading {f.name}: {e}") | |
md_content = "\n\n# ".join(md_contents) | |
if not md_content.startswith("#"): md_content = "# " + md_content | |
generated_files = [] | |
for format_choice in progress.tqdm(output_formats, desc="Generating Formats"): | |
time_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') | |
try: | |
if format_choice == "PDF": | |
for layout_name in layouts: | |
for font_name in fonts: | |
pagesize = LAYOUTS.get(layout_name, {}).get("size", letter) | |
pdf_buffer = create_pdf(md_content, font_name, pagesize, num_columns) | |
filename = f"Document_{layout_name.replace(' ','-')}_{font_name}_{time_str}.pdf" | |
output_path = OUTPUT_DIR / filename | |
with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue()) | |
generated_files.append(output_path) | |
elif format_choice == "DOCX": | |
docx_doc = create_docx(md_content) | |
filename = f"Document_{time_str}.docx" | |
output_path = OUTPUT_DIR / filename | |
docx_doc.save(output_path) | |
generated_files.append(output_path) | |
elif format_choice == "XLSX": | |
xlsx_book = create_xlsx(md_content) | |
filename = f"Outline_{time_str}.xlsx" | |
output_path = OUTPUT_DIR / filename | |
xlsx_book.save(output_path) | |
generated_files.append(output_path) | |
except Exception as e: | |
print(f"Failed to generate {format_choice}: {e}") | |
gr.Warning(f"Failed to generate {format_choice}. See console for details.") | |
gallery_previews = [p for p in [create_pdf_preview(f) for f in generated_files if f.suffix == '.pdf'] if p] | |
log_message = f"Generated {len(generated_files)} files." if generated_files else "Generation failed. Check logs." | |
return gallery_previews, log_message, [str(p) for p in generated_files] | |
# --- Gradio UI Definition --- | |
try: | |
AVAILABLE_FONTS = setup_fonts() | |
except FileNotFoundError as e: | |
print(e) | |
# If font setup fails, we can't run the app. | |
# This prevents Gradio from starting with a fatal error. | |
AVAILABLE_FONTS = [] | |
with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Format Document Generator") as demo: | |
gr.Markdown("# π Multi-Format Document Generator (PDF, DOCX, XLSX)") | |
gr.Markdown("Upload one or more Markdown files (`.md`). The tool will combine them and generate documents in your chosen formats. Emojis in PDFs are fully supported! π₯³") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### βοΈ Generation Settings") | |
uploaded_files = gr.File(label="Upload Markdown Files", file_count="multiple", file_types=[".md"]) | |
output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"]) | |
with gr.Accordion("PDF Customization", open=True): | |
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["Letter Portrait"]) | |
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else []) | |
num_columns_slider = gr.Slider(label="Text Columns (for PDF)", minimum=1, maximum=4, step=1, value=1) | |
generate_btn = gr.Button("π Generate Documents", variant="primary") | |
with gr.Column(scale=2): | |
gr.Markdown("### πΌοΈ Output Files") | |
gallery_output = gr.Gallery(label="PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain") | |
log_output = gr.Markdown(label="Generation Log", value="Ready...") | |
downloadable_files_output = gr.Files(label="Download Generated Files") | |
if not AVAILABLE_FONTS: | |
gr.Warning("The application is in a degraded state. Required font files are missing. Please check the console log and add the necessary files.") | |
else: | |
generate_btn.click( | |
fn=generate_outputs_api, | |
inputs=[uploaded_files, output_formats, selected_layouts, selected_fonts, num_columns_slider], | |
outputs=[gallery_output, log_output, downloadable_files_output] | |
) | |
if __name__ == "__main__": | |
if AVAILABLE_FONTS: | |
demo.launch() | |
else: | |
print("\n" + "="*60) | |
print("Application launch aborted due to missing font files.") | |
print("Please ensure 'NotoColorEmoji-Regular.ttf' is in the project directory.") | |
print("="*60) | |