Spaces:
Build error
Build error
File size: 12,413 Bytes
afb8307 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
import gradio as gr
from pathlib import Path
import datetime
import re
import requests
import os
import shutil
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.pagesizes import letter, A4, legal, landscape
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# --- Configuration & Setup ---
LAYOUTS = {
"A4 Portrait": {"size": A4},
"A4 Landscape": {"size": landscape(A4)},
"Letter Portrait": {"size": letter},
"Letter Landscape": {"size": landscape(letter)},
"Legal Portrait": {"size": legal},
"Legal Landscape": {"size": landscape(legal)},
}
OUTPUT_DIR = Path("generated_pdfs")
OUTPUT_DIR.mkdir(exist_ok=True)
FONT_DIR = Path("fonts")
FONT_DIR.mkdir(exist_ok=True)
EMOJI_FONT_NAME = "NotoColorEmoji"
# --- Font & Emoji Handling ---
def download_fonts():
"""Downloads DejaVuSans (for text) and NotoColorEmoji (for emojis) if they don't exist."""
fonts_to_check = {
"DejaVuSans.ttf": "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true",
"NotoColorEmoji.ttf": "https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf?raw=true"
}
for font_filename, url in fonts_to_check.items():
font_path = FONT_DIR / font_filename
if not font_path.exists():
print(f"Downloading {font_filename}...")
try:
r = requests.get(url, allow_redirects=True)
r.raise_for_status()
with open(font_path, "wb") as f:
f.write(r.content)
print(f"{font_filename} downloaded successfully.")
except Exception as e:
print(f"Failed to download {font_filename}: {e}")
def discover_and_register_fonts():
"""Finds .ttf files, registers them, and ensures the emoji font is available."""
download_fonts()
font_files = list(FONT_DIR.glob("*.ttf"))
font_names = []
for font_path in font_files:
try:
font_name = font_path.stem
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
# Don't add the emoji font to the user-selectable list
if "emoji" not in font_name.lower():
font_names.append(font_name)
except Exception as e:
print(f"Could not register font {font_path.name}: {e}")
return sorted(font_names)
def apply_emoji_font(text: str) -> str:
"""Wraps emoji characters in a <font> tag to use the dedicated emoji font."""
# This regex covers a wide range of emoji unicode blocks.
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
return emoji_pattern.sub(fr'<font name="{EMOJI_FONT_NAME}">\1</font>', text)
# --- ReportLab PDF Generation (Core Logic) ---
def markdown_to_story(markdown_text: str, font_name: str):
"""Converts markdown to a ReportLab story, handling emojis and page breaks."""
styles = getSampleStyleSheet()
style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6, leading=14)
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=20, leading=24)
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, spaceBefore=10, fontSize=16, leading=20)
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, spaceBefore=8, fontSize=14, leading=18)
style_code = ParagraphStyle('Code', parent=styles['Code'], fontName='Courier', backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))
story = []
# Split by our custom page break marker or process as a single block
pages = markdown_text.split('\n\n---PAGE_BREAK---\n\n')
for i, page_content in enumerate(pages):
lines = page_content.split('\n')
in_code_block, in_table = False, False
code_block_text, table_data = "", []
for line in lines:
line_with_emoji = apply_emoji_font(line)
# Table handler...
if line.strip().startswith('|') and line.strip().endswith('|'):
if not in_table: in_table = True
if all(c in '-|: ' for c in line.strip()): continue
cells = [apply_emoji_font(c.strip()) for c in line.strip().strip('|').split('|')]
table_data.append([Paragraph(cell, style_normal) for cell in cells])
continue
elif in_table:
in_table = False
if table_data:
table = Table(table_data, hAlign='LEFT', repeatRows=1)
table.setStyle(TableStyle([
('BACKGROUND', (0,0), (-1,0), colors.lightgrey),
('TEXTCOLOR', (0,0), (-1,0), colors.black),
('ALIGN', (0,0), (-1,-1), 'LEFT'),
('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
('BOTTOMPADDING', (0,0), (-1,0), 12),
('GRID', (0,0), (-1,-1), 1, colors.black)
]))
story.append(table)
story.append(Spacer(1, 0.2 * inch))
# Code block handler...
if line.strip().startswith("```"):
in_code_block = not in_code_block
if not in_code_block:
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
code_block_text = ""
continue
if in_code_block:
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n'
continue
# Markdown elements to Flowables
if line.startswith("# "): story.append(Paragraph(line_with_emoji[2:], style_h1))
elif line.startswith("## "): story.append(Paragraph(line_with_emoji[3:], style_h2))
elif line.startswith("### "): story.append(Paragraph(line_with_emoji[4:], style_h3))
elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line_with_emoji.strip()[2:], style_normal, bulletText='β’'))
elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line_with_emoji.strip(), style_normal))
elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
else:
formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line_with_emoji))
story.append(Paragraph(formatted_line, style_normal))
if i < len(pages) - 1:
story.append(PageBreak())
return story
# --- Gradio API Function ---
def generate_pdfs_api(files, layouts, fonts, combine_files, num_columns, progress=gr.Progress(track_tqdm=True)):
if not files: raise gr.Error("Please upload at least one Markdown file.")
if not layouts: raise gr.Error("Please select at least one page layout.")
if not fonts: raise gr.Error("Please select at least one font.")
if OUTPUT_DIR.exists():
shutil.rmtree(OUTPUT_DIR)
OUTPUT_DIR.mkdir(exist_ok=True)
log_updates = "Starting PDF generation...\n"
md_contents = []
for md_file_obj in files:
with open(md_file_obj.name, 'r', encoding='utf-8') as f:
md_contents.append(f.read())
tasks = []
if combine_files:
combined_content = '\n\n---PAGE_BREAK---\n\n'.join(md_contents)
for layout_name in layouts:
for font_name in fonts:
tasks.append({"content": combined_content, "layout": layout_name, "font": font_name, "filename_stem": "Combined_Document"})
else:
for i, content in enumerate(md_contents):
filename_stem = Path(files[i].name).stem
for layout_name in layouts:
for font_name in fonts:
tasks.append({"content": content, "layout": layout_name, "font": font_name, "filename_stem": filename_stem})
for task in progress.tqdm(tasks, desc="Generating PDFs"):
try:
date_str = datetime.datetime.now().strftime("%Y-%m-%d")
output_filename = f"{task['filename_stem']}_{task['layout'].replace(' ', '-')}_{task['font']}_Cols{num_columns}_{date_str}.pdf"
output_path = OUTPUT_DIR / output_filename
log_updates += f" - Generating: {output_filename}\n"
story = markdown_to_story(task['content'], task['font'])
pagesize = LAYOUTS[task['layout']]["size"]
if num_columns > 1:
doc = BaseDocTemplate(str(output_path), pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height, id=f'col{i}') for i in range(num_columns)]
doc.addPageTemplates([PageTemplate(id='TwoCol', frames=frames)])
doc.build(story)
else:
doc = SimpleDocTemplate(str(output_path), pagesize=pagesize, leftMargin=inch, rightMargin=inch, topMargin=inch, bottomMargin=inch)
doc.build(story)
except Exception as e:
log_updates += f" - **ERROR**: {e}\n"
log_updates += "\nβ
PDF generation complete!"
generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
return generated_files, log_updates
# --- Gradio UI Definition ---
AVAILABLE_FONTS = discover_and_register_fonts()
SAMPLE_MARKDOWN = "# Sample Document π\n\nThis document shows **bold text**, _italic text_, and emojis like π and π».\n\n### A Table\n| Flavor | Rating |\n|-------------|------------|\n| Chocolate | 10/10 |\n| Vanilla | 9/10 |"
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
gr.Markdown("# π Advanced PDF Generator with Emojis & Columns")
gr.Markdown("Upload Markdown files, combine them, and generate multi-column PDFs with custom fonts and layouts.")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### βοΈ Generation Settings")
uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
combine_files_check = gr.Checkbox(label="Combine uploaded files into a single PDF", value=False)
num_columns_slider = gr.Slider(label="Number of Columns", minimum=1, maximum=4, step=1, value=1)
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
if not AVAILABLE_FONTS:
gr.Warning("No text fonts found in 'fonts' directory. Using defaults.")
AVAILABLE_FONTS = ["Helvetica"]
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
generate_btn = gr.Button("π Generate PDFs", variant="primary")
gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10, interactive=False)
with gr.Column(scale=2):
gr.Markdown("### π Results")
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
file_output = gr.Files(label="Download Generated PDFs")
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, combine_files_check, num_columns_slider], outputs=[file_output, log_output])
if __name__ == "__main__":
demo.launch()
|