Spaces:
Build error
Build error
import gradio as gr | |
from pathlib import Path | |
import datetime | |
import re | |
import requests | |
import os | |
import shutil | |
import fitz # PyMuPDF | |
from PIL import Image | |
from collections import defaultdict | |
import io | |
from pypdf import PdfWriter | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
# --- Configuration & Setup --- | |
# Use the current working directory as the base for all paths for robustness. | |
CWD = Path.cwd() | |
LAYOUTS = { | |
"A4 Portrait": {"size": A4}, | |
"A4 Landscape": {"size": landscape(A4)}, | |
"Letter Portrait": {"size": letter}, | |
"Letter Landscape": {"size": landscape(letter)}, | |
"Legal Portrait": {"size": legal}, | |
"Legal Landscape": {"size": landscape(legal)}, | |
} | |
OUTPUT_DIR = CWD / "generated_pdfs" | |
PREVIEW_DIR = CWD / "previews" | |
FONT_DIR = CWD / "fonts" | |
EMOJI_FONT_NAME = "NotoColorEmoji" | |
# Create necessary directories | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
PREVIEW_DIR.mkdir(exist_ok=True) | |
FONT_DIR.mkdir(exist_ok=True) | |
# --- Font & Emoji Handling --- | |
def download_and_register_fonts(): | |
"""Downloads default fonts if needed, then finds and registers all .ttf files.""" | |
print("--- Font Registration Process Starting ---") | |
fonts_to_download = { | |
"DejaVuSans.ttf": "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true", | |
"NotoColorEmoji.ttf": "https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf?raw=true" | |
} | |
for filename, url in fonts_to_download.items(): | |
font_path = FONT_DIR / filename | |
if not font_path.exists(): | |
print(f"Downloading {filename}...") | |
try: | |
r = requests.get(url, allow_redirects=True, timeout=20) | |
r.raise_for_status() | |
with open(font_path, "wb") as f: f.write(r.content) | |
print(f"{filename} downloaded.") | |
except Exception as e: | |
print(f"Failed to download {filename}: {e}") | |
font_names = [] | |
print(f"Scanning for fonts in: {FONT_DIR.absolute()}") | |
font_files = list(FONT_DIR.glob("*.ttf")) | |
print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}") | |
for font_path in font_files: | |
try: | |
font_name = font_path.stem | |
print(f"Registering font: '{font_name}' from {font_path.name}") | |
pdfmetrics.registerFont(TTFont(font_name, str(font_path))) | |
# CRITICAL FIX for bold/italic error: | |
pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path))) | |
pdfmetrics.registerFont(TTFont(f"{font_name}-Italic", str(font_path))) | |
pdfmetrics.registerFont(TTFont(f"{font_name}-BoldItalic", str(font_path))) | |
pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold", italic=f"{font_name}-Italic", boldItalic=f"{font_name}-BoldItalic") | |
if "emoji" not in font_name.lower(): | |
font_names.append(font_name) | |
except Exception as e: | |
print(f"Could not register font {font_path.name}: {e}") | |
print(f"Successfully registered user-selectable fonts: {font_names}") | |
print("--- Font Registration Process Finished ---") | |
return sorted(font_names) | |
def apply_emoji_font(text: str) -> str: | |
"""Wraps emoji characters in a <font> tag to use the dedicated emoji font.""" | |
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)") | |
return emoji_pattern.sub(fr'<font name="{EMOJI_FONT_NAME}">\1</font>', text) | |
# --- PDF Generation & Handling --- | |
def markdown_to_story(markdown_text: str, font_name: str): | |
"""Converts markdown to a ReportLab story, handling emojis and page breaks.""" | |
styles = getSampleStyleSheet() | |
style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, leading=14) | |
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=20, leading=24) | |
style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke) | |
story = [] | |
for line in markdown_text.split('\n'): | |
line_with_emoji = apply_emoji_font(line) | |
formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line_with_emoji)) | |
if line.startswith("# "): story.append(Paragraph(formatted_line[2:], style_h1)) | |
else: story.append(Paragraph(formatted_line, style_normal)) | |
return story | |
def create_pdf_preview(pdf_path: Path): | |
"""Creates a PNG preview of the first page of a PDF.""" | |
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png" | |
try: | |
doc = fitz.open(pdf_path) | |
page = doc.load_page(0) | |
pix = page.get_pixmap() | |
pix.save(str(preview_path)) | |
doc.close() | |
return str(preview_path) | |
except Exception as e: | |
print(f"Could not create preview for {pdf_path.name}: {e}") | |
return None | |
# --- Main API Function --- | |
def generate_pdfs_api(files, layouts, fonts, num_columns, progress=gr.Progress(track_tqdm=True)): | |
"""Main function to drive PDF generation from the Gradio UI.""" | |
if not files: raise gr.Error("Please upload at least one Markdown or Image file.") | |
if not layouts: raise gr.Error("Please select at least one page layout.") | |
if not fonts: raise gr.Error("Please select at least one font.") | |
shutil.rmtree(OUTPUT_DIR, ignore_errors=True) | |
shutil.rmtree(PREVIEW_DIR, ignore_errors=True) | |
OUTPUT_DIR.mkdir() | |
PREVIEW_DIR.mkdir() | |
grouped_files = defaultdict(lambda: {'md': [], 'img': []}) | |
for f in files: | |
file_path = Path(f.name) | |
stem = file_path.stem.split('_')[0] if '_' in file_path.stem else file_path.stem | |
if file_path.suffix.lower() == '.md': | |
grouped_files[stem]['md'].append(file_path) | |
elif file_path.suffix.lower() in ['.png', '.jpg', '.jpeg']: | |
grouped_files[stem]['img'].append(file_path) | |
log_updates = "" | |
generated_pdf_paths = [] | |
for stem, assets in progress.tqdm(grouped_files.items(), desc="Processing File Groups"): | |
for layout_name in layouts: | |
for font_name in fonts: | |
merger = PdfWriter() | |
if assets['md']: | |
md_content = "\n\n---PAGE_BREAK---\n\n".join([p.read_text(encoding='utf-8') for p in assets['md']]) | |
md_buffer = io.BytesIO() | |
story = markdown_to_story(md_content, font_name) | |
pagesize = LAYOUTS[layout_name]["size"] | |
if num_columns > 1: | |
doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch) | |
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch | |
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)] | |
doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)]) | |
else: | |
doc = SimpleDocTemplate(md_buffer, pagesize=pagesize) | |
doc.build(story) | |
merger.append(fileobj=md_buffer) | |
for img_path in assets['img']: | |
with Image.open(img_path) as img: | |
img_width, img_height = img.size | |
img_buffer = io.BytesIO() | |
doc = SimpleDocTemplate(img_buffer, pagesize=(img_width, img_height), leftMargin=0, rightMargin=0, topMargin=0, bottomMargin=0) | |
doc.build([ReportLabImage(img_path, width=img_width, height=img_height)]) | |
merger.append(fileobj=img_buffer) | |
if len(merger.pages) > 0: | |
time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper() | |
filename = f"{stem}_{time_str}_{layout_name.replace(' ','-')}_{font_name}_Cols{num_columns}.pdf" | |
output_path = OUTPUT_DIR / filename | |
with open(output_path, "wb") as f: | |
merger.write(f) | |
generated_pdf_paths.append(output_path) | |
log_updates += f"Generated: {filename}\n" | |
gallery_previews = [create_pdf_preview(p) for p in generated_pdf_paths] | |
final_gallery = [g for g in gallery_previews if g is not None] | |
return final_gallery, log_updates, [str(p) for p in generated_pdf_paths] | |
# --- Gradio UI Definition --- | |
AVAILABLE_FONTS = download_and_register_fonts() | |
SAMPLE_MARKDOWN = "# Sample Document π\n\nThis shows **bold**, _italic_, and emojis like π and π».\n\n### A Table\n| Flavor | Rating |\n|---|---|\n| Chocolate π«| 10/10 |\n| Vanilla π¦| 9/10 |\n\n```python\n# Code blocks too!\ndef hello():\n return 'Hello from PDF!'\n```" | |
with open(CWD / "sample.md", "w") as f: f.write(SAMPLE_MARKDOWN) | |
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo: | |
gr.Markdown("# π Advanced PDF Generator") | |
gr.Markdown("Upload Markdown and Image files. Group assets with a common name (e.g., `Doc_part1.md`, `Doc_img1.png`). The app will generate PDFs for all selected variations and display them below.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### βοΈ Generation Settings") | |
uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg", ".jpeg"]) | |
num_columns_slider = gr.Slider(label="Number of Columns for Text", minimum=1, maximum=4, step=1, value=1) | |
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=["A4 Portrait"]) | |
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Fonts", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else []) | |
generate_btn = gr.Button("π Generate PDFs", variant="primary") | |
with gr.Column(scale=2): | |
gr.Markdown("### πΌοΈ PDF Preview Gallery") | |
gallery_output = gr.Gallery(label="Generated PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto") | |
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...") | |
downloadable_files_output = gr.Files(label="Download Generated PDFs") | |
def auto_run_demo(request: gr.Request): | |
"""Function to run on app load to generate a sample PDF.""" | |
print("Running initial demo generation...") | |
# Create a dummy file object for Gradio | |
with open(CWD / "sample.md", "rb") as f: | |
sample_bytes = f.read() | |
demo_files = [gr.processing_utils.SavedFile(name=str(CWD / "sample.md"), data=sample_bytes, is_file=True)] | |
previews, logs, files = generate_pdfs_api(demo_files, ["A4 Portrait"], [AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [], 1) | |
return previews, logs, files | |
demo.load(auto_run_demo, inputs=None, outputs=[gallery_output, log_output, downloadable_files_output]) | |
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, num_columns_slider], outputs=[gallery_output, log_output, downloadable_files_output]) | |
if __name__ == "__main__": | |
demo.launch() | |