Spaces:
Build error
Build error
import gradio as gr | |
from pathlib import Path | |
import datetime | |
import re | |
import requests | |
import os | |
import shutil | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
# --- Configuration & Setup --- | |
LAYOUTS = { | |
"A4 Portrait": {"size": A4}, | |
"A4 Landscape": {"size": landscape(A4)}, | |
"Letter Portrait": {"size": letter}, | |
"Letter Landscape": {"size": landscape(letter)}, | |
"Legal Portrait": {"size": legal}, | |
"Legal Landscape": {"size": landscape(legal)}, | |
} | |
OUTPUT_DIR = Path("generated_pdfs") | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
FONT_DIR = Path("fonts") | |
FONT_DIR.mkdir(exist_ok=True) | |
EMOJI_FONT_NAME = "NotoColorEmoji" | |
# --- Font & Emoji Handling --- | |
def download_fonts(): | |
"""Downloads DejaVuSans (for text) and NotoColorEmoji (for emojis) if they don't exist.""" | |
fonts_to_check = { | |
"DejaVuSans.ttf": "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true", | |
"NotoColorEmoji.ttf": "https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf?raw=true" | |
} | |
for font_filename, url in fonts_to_check.items(): | |
font_path = FONT_DIR / font_filename | |
if not font_path.exists(): | |
print(f"Downloading {font_filename}...") | |
try: | |
r = requests.get(url, allow_redirects=True) | |
r.raise_for_status() | |
with open(font_path, "wb") as f: | |
f.write(r.content) | |
print(f"{font_filename} downloaded successfully.") | |
except Exception as e: | |
print(f"Failed to download {font_filename}: {e}") | |
def discover_and_register_fonts(): | |
"""Finds .ttf files, registers them, and ensures the emoji font is available.""" | |
download_fonts() | |
font_files = list(FONT_DIR.glob("*.ttf")) | |
font_names = [] | |
for font_path in font_files: | |
try: | |
font_name = font_path.stem | |
pdfmetrics.registerFont(TTFont(font_name, str(font_path))) | |
# Don't add the emoji font to the user-selectable list | |
if "emoji" not in font_name.lower(): | |
font_names.append(font_name) | |
except Exception as e: | |
print(f"Could not register font {font_path.name}: {e}") | |
return sorted(font_names) | |
def apply_emoji_font(text: str) -> str: | |
"""Wraps emoji characters in a <font> tag to use the dedicated emoji font.""" | |
# This regex covers a wide range of emoji unicode blocks. | |
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)") | |
return emoji_pattern.sub(fr'<font name="{EMOJI_FONT_NAME}">\1</font>', text) | |
# --- ReportLab PDF Generation (Core Logic) --- | |
def markdown_to_story(markdown_text: str, font_name: str): | |
"""Converts markdown to a ReportLab story, handling emojis and page breaks.""" | |
styles = getSampleStyleSheet() | |
style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6, leading=14) | |
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=20, leading=24) | |
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, spaceBefore=10, fontSize=16, leading=20) | |
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, spaceBefore=8, fontSize=14, leading=18) | |
style_code = ParagraphStyle('Code', parent=styles['Code'], fontName='Courier', backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5)) | |
story = [] | |
# Split by our custom page break marker or process as a single block | |
pages = markdown_text.split('\n\n---PAGE_BREAK---\n\n') | |
for i, page_content in enumerate(pages): | |
lines = page_content.split('\n') | |
in_code_block, in_table = False, False | |
code_block_text, table_data = "", [] | |
for line in lines: | |
line_with_emoji = apply_emoji_font(line) | |
# Table handler... | |
if line.strip().startswith('|') and line.strip().endswith('|'): | |
if not in_table: in_table = True | |
if all(c in '-|: ' for c in line.strip()): continue | |
cells = [apply_emoji_font(c.strip()) for c in line.strip().strip('|').split('|')] | |
table_data.append([Paragraph(cell, style_normal) for cell in cells]) | |
continue | |
elif in_table: | |
in_table = False | |
if table_data: | |
table = Table(table_data, hAlign='LEFT', repeatRows=1) | |
table.setStyle(TableStyle([ | |
('BACKGROUND', (0,0), (-1,0), colors.lightgrey), | |
('TEXTCOLOR', (0,0), (-1,0), colors.black), | |
('ALIGN', (0,0), (-1,-1), 'LEFT'), | |
('VALIGN', (0,0), (-1,-1), 'MIDDLE'), | |
('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'), | |
('BOTTOMPADDING', (0,0), (-1,0), 12), | |
('GRID', (0,0), (-1,-1), 1, colors.black) | |
])) | |
story.append(table) | |
story.append(Spacer(1, 0.2 * inch)) | |
# Code block handler... | |
if line.strip().startswith("```"): | |
in_code_block = not in_code_block | |
if not in_code_block: | |
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)) | |
code_block_text = "" | |
continue | |
if in_code_block: | |
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n' | |
continue | |
# Markdown elements to Flowables | |
if line.startswith("# "): story.append(Paragraph(line_with_emoji[2:], style_h1)) | |
elif line.startswith("## "): story.append(Paragraph(line_with_emoji[3:], style_h2)) | |
elif line.startswith("### "): story.append(Paragraph(line_with_emoji[4:], style_h3)) | |
elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line_with_emoji.strip()[2:], style_normal, bulletText='β’')) | |
elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line_with_emoji.strip(), style_normal)) | |
elif line.strip() == "": story.append(Spacer(1, 0.1 * inch)) | |
else: | |
formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line_with_emoji)) | |
story.append(Paragraph(formatted_line, style_normal)) | |
if i < len(pages) - 1: | |
story.append(PageBreak()) | |
return story | |
# --- Gradio API Function --- | |
def generate_pdfs_api(files, layouts, fonts, combine_files, num_columns, progress=gr.Progress(track_tqdm=True)): | |
if not files: raise gr.Error("Please upload at least one Markdown file.") | |
if not layouts: raise gr.Error("Please select at least one page layout.") | |
if not fonts: raise gr.Error("Please select at least one font.") | |
if OUTPUT_DIR.exists(): | |
shutil.rmtree(OUTPUT_DIR) | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
log_updates = "Starting PDF generation...\n" | |
md_contents = [] | |
for md_file_obj in files: | |
with open(md_file_obj.name, 'r', encoding='utf-8') as f: | |
md_contents.append(f.read()) | |
tasks = [] | |
if combine_files: | |
combined_content = '\n\n---PAGE_BREAK---\n\n'.join(md_contents) | |
for layout_name in layouts: | |
for font_name in fonts: | |
tasks.append({"content": combined_content, "layout": layout_name, "font": font_name, "filename_stem": "Combined_Document"}) | |
else: | |
for i, content in enumerate(md_contents): | |
filename_stem = Path(files[i].name).stem | |
for layout_name in layouts: | |
for font_name in fonts: | |
tasks.append({"content": content, "layout": layout_name, "font": font_name, "filename_stem": filename_stem}) | |
for task in progress.tqdm(tasks, desc="Generating PDFs"): | |
try: | |
date_str = datetime.datetime.now().strftime("%Y-%m-%d") | |
output_filename = f"{task['filename_stem']}_{task['layout'].replace(' ', '-')}_{task['font']}_Cols{num_columns}_{date_str}.pdf" | |
output_path = OUTPUT_DIR / output_filename | |
log_updates += f" - Generating: {output_filename}\n" | |
story = markdown_to_story(task['content'], task['font']) | |
pagesize = LAYOUTS[task['layout']]["size"] | |
if num_columns > 1: | |
doc = BaseDocTemplate(str(output_path), pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch) | |
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch | |
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height, id=f'col{i}') for i in range(num_columns)] | |
doc.addPageTemplates([PageTemplate(id='TwoCol', frames=frames)]) | |
doc.build(story) | |
else: | |
doc = SimpleDocTemplate(str(output_path), pagesize=pagesize, leftMargin=inch, rightMargin=inch, topMargin=inch, bottomMargin=inch) | |
doc.build(story) | |
except Exception as e: | |
log_updates += f" - **ERROR**: {e}\n" | |
log_updates += "\nβ PDF generation complete!" | |
generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")] | |
return generated_files, log_updates | |
# --- Gradio UI Definition --- | |
AVAILABLE_FONTS = discover_and_register_fonts() | |
SAMPLE_MARKDOWN = "# Sample Document π\n\nThis document shows **bold text**, _italic text_, and emojis like π and π».\n\n### A Table\n| Flavor | Rating |\n|-------------|------------|\n| Chocolate | 10/10 |\n| Vanilla | 9/10 |" | |
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo: | |
gr.Markdown("# π Advanced PDF Generator with Emojis & Columns") | |
gr.Markdown("Upload Markdown files, combine them, and generate multi-column PDFs with custom fonts and layouts.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### βοΈ Generation Settings") | |
uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"]) | |
combine_files_check = gr.Checkbox(label="Combine uploaded files into a single PDF", value=False) | |
num_columns_slider = gr.Slider(label="Number of Columns", minimum=1, maximum=4, step=1, value=1) | |
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0]) | |
if not AVAILABLE_FONTS: | |
gr.Warning("No text fonts found in 'fonts' directory. Using defaults.") | |
AVAILABLE_FONTS = ["Helvetica"] | |
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None) | |
generate_btn = gr.Button("π Generate PDFs", variant="primary") | |
gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10, interactive=False) | |
with gr.Column(scale=2): | |
gr.Markdown("### π Results") | |
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...") | |
file_output = gr.Files(label="Download Generated PDFs") | |
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, combine_files_check, num_columns_slider], outputs=[file_output, log_output]) | |
if __name__ == "__main__": | |
demo.launch() | |