Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
import datetime
|
4 |
+
import re
|
5 |
+
import requests
|
6 |
+
import os
|
7 |
+
import shutil
|
8 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate
|
9 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
10 |
+
from reportlab.lib.pagesizes import letter, A4, legal, landscape
|
11 |
+
from reportlab.lib.units import inch
|
12 |
+
from reportlab.lib import colors
|
13 |
+
from reportlab.pdfbase import pdfmetrics
|
14 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
15 |
+
|
16 |
+
# --- Configuration & Setup ---
|
17 |
+
LAYOUTS = {
|
18 |
+
"A4 Portrait": {"size": A4},
|
19 |
+
"A4 Landscape": {"size": landscape(A4)},
|
20 |
+
"Letter Portrait": {"size": letter},
|
21 |
+
"Letter Landscape": {"size": landscape(letter)},
|
22 |
+
"Legal Portrait": {"size": legal},
|
23 |
+
"Legal Landscape": {"size": landscape(legal)},
|
24 |
+
}
|
25 |
+
OUTPUT_DIR = Path("generated_pdfs")
|
26 |
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
27 |
+
FONT_DIR = Path("fonts")
|
28 |
+
FONT_DIR.mkdir(exist_ok=True)
|
29 |
+
EMOJI_FONT_NAME = "NotoColorEmoji"
|
30 |
+
|
31 |
+
# --- Font & Emoji Handling ---
|
32 |
+
|
33 |
+
def download_fonts():
|
34 |
+
"""Downloads DejaVuSans (for text) and NotoColorEmoji (for emojis) if they don't exist."""
|
35 |
+
fonts_to_check = {
|
36 |
+
"DejaVuSans.ttf": "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true",
|
37 |
+
"NotoColorEmoji.ttf": "https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf?raw=true"
|
38 |
+
}
|
39 |
+
for font_filename, url in fonts_to_check.items():
|
40 |
+
font_path = FONT_DIR / font_filename
|
41 |
+
if not font_path.exists():
|
42 |
+
print(f"Downloading {font_filename}...")
|
43 |
+
try:
|
44 |
+
r = requests.get(url, allow_redirects=True)
|
45 |
+
r.raise_for_status()
|
46 |
+
with open(font_path, "wb") as f:
|
47 |
+
f.write(r.content)
|
48 |
+
print(f"{font_filename} downloaded successfully.")
|
49 |
+
except Exception as e:
|
50 |
+
print(f"Failed to download {font_filename}: {e}")
|
51 |
+
|
52 |
+
def discover_and_register_fonts():
|
53 |
+
"""Finds .ttf files, registers them, and ensures the emoji font is available."""
|
54 |
+
download_fonts()
|
55 |
+
font_files = list(FONT_DIR.glob("*.ttf"))
|
56 |
+
font_names = []
|
57 |
+
for font_path in font_files:
|
58 |
+
try:
|
59 |
+
font_name = font_path.stem
|
60 |
+
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
61 |
+
# Don't add the emoji font to the user-selectable list
|
62 |
+
if "emoji" not in font_name.lower():
|
63 |
+
font_names.append(font_name)
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Could not register font {font_path.name}: {e}")
|
66 |
+
return sorted(font_names)
|
67 |
+
|
68 |
+
def apply_emoji_font(text: str) -> str:
|
69 |
+
"""Wraps emoji characters in a <font> tag to use the dedicated emoji font."""
|
70 |
+
# This regex covers a wide range of emoji unicode blocks.
|
71 |
+
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
|
72 |
+
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
|
73 |
+
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
|
74 |
+
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
|
75 |
+
f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
|
76 |
+
return emoji_pattern.sub(fr'<font name="{EMOJI_FONT_NAME}">\1</font>', text)
|
77 |
+
|
78 |
+
|
79 |
+
# --- ReportLab PDF Generation (Core Logic) ---
|
80 |
+
|
81 |
+
def markdown_to_story(markdown_text: str, font_name: str):
|
82 |
+
"""Converts markdown to a ReportLab story, handling emojis and page breaks."""
|
83 |
+
styles = getSampleStyleSheet()
|
84 |
+
style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6, leading=14)
|
85 |
+
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=20, leading=24)
|
86 |
+
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, spaceBefore=10, fontSize=16, leading=20)
|
87 |
+
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, spaceBefore=8, fontSize=14, leading=18)
|
88 |
+
style_code = ParagraphStyle('Code', parent=styles['Code'], fontName='Courier', backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))
|
89 |
+
|
90 |
+
story = []
|
91 |
+
# Split by our custom page break marker or process as a single block
|
92 |
+
pages = markdown_text.split('\n\n---PAGE_BREAK---\n\n')
|
93 |
+
|
94 |
+
for i, page_content in enumerate(pages):
|
95 |
+
lines = page_content.split('\n')
|
96 |
+
in_code_block, in_table = False, False
|
97 |
+
code_block_text, table_data = "", []
|
98 |
+
|
99 |
+
for line in lines:
|
100 |
+
line_with_emoji = apply_emoji_font(line)
|
101 |
+
# Table handler...
|
102 |
+
if line.strip().startswith('|') and line.strip().endswith('|'):
|
103 |
+
if not in_table: in_table = True
|
104 |
+
if all(c in '-|: ' for c in line.strip()): continue
|
105 |
+
cells = [apply_emoji_font(c.strip()) for c in line.strip().strip('|').split('|')]
|
106 |
+
table_data.append([Paragraph(cell, style_normal) for cell in cells])
|
107 |
+
continue
|
108 |
+
elif in_table:
|
109 |
+
in_table = False
|
110 |
+
if table_data:
|
111 |
+
table = Table(table_data, hAlign='LEFT', repeatRows=1)
|
112 |
+
table.setStyle(TableStyle([
|
113 |
+
('BACKGROUND', (0,0), (-1,0), colors.lightgrey),
|
114 |
+
('TEXTCOLOR', (0,0), (-1,0), colors.black),
|
115 |
+
('ALIGN', (0,0), (-1,-1), 'LEFT'),
|
116 |
+
('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
|
117 |
+
('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
|
118 |
+
('BOTTOMPADDING', (0,0), (-1,0), 12),
|
119 |
+
('GRID', (0,0), (-1,-1), 1, colors.black)
|
120 |
+
]))
|
121 |
+
story.append(table)
|
122 |
+
story.append(Spacer(1, 0.2 * inch))
|
123 |
+
|
124 |
+
# Code block handler...
|
125 |
+
if line.strip().startswith("```"):
|
126 |
+
in_code_block = not in_code_block
|
127 |
+
if not in_code_block:
|
128 |
+
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
|
129 |
+
code_block_text = ""
|
130 |
+
continue
|
131 |
+
if in_code_block:
|
132 |
+
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n'
|
133 |
+
continue
|
134 |
+
|
135 |
+
# Markdown elements to Flowables
|
136 |
+
if line.startswith("# "): story.append(Paragraph(line_with_emoji[2:], style_h1))
|
137 |
+
elif line.startswith("## "): story.append(Paragraph(line_with_emoji[3:], style_h2))
|
138 |
+
elif line.startswith("### "): story.append(Paragraph(line_with_emoji[4:], style_h3))
|
139 |
+
elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line_with_emoji.strip()[2:], style_normal, bulletText='β’'))
|
140 |
+
elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line_with_emoji.strip(), style_normal))
|
141 |
+
elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
|
142 |
+
else:
|
143 |
+
formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line_with_emoji))
|
144 |
+
story.append(Paragraph(formatted_line, style_normal))
|
145 |
+
|
146 |
+
if i < len(pages) - 1:
|
147 |
+
story.append(PageBreak())
|
148 |
+
|
149 |
+
return story
|
150 |
+
|
151 |
+
# --- Gradio API Function ---
|
152 |
+
|
153 |
+
def generate_pdfs_api(files, layouts, fonts, combine_files, num_columns, progress=gr.Progress(track_tqdm=True)):
|
154 |
+
if not files: raise gr.Error("Please upload at least one Markdown file.")
|
155 |
+
if not layouts: raise gr.Error("Please select at least one page layout.")
|
156 |
+
if not fonts: raise gr.Error("Please select at least one font.")
|
157 |
+
|
158 |
+
if OUTPUT_DIR.exists():
|
159 |
+
shutil.rmtree(OUTPUT_DIR)
|
160 |
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
161 |
+
|
162 |
+
log_updates = "Starting PDF generation...\n"
|
163 |
+
|
164 |
+
md_contents = []
|
165 |
+
for md_file_obj in files:
|
166 |
+
with open(md_file_obj.name, 'r', encoding='utf-8') as f:
|
167 |
+
md_contents.append(f.read())
|
168 |
+
|
169 |
+
tasks = []
|
170 |
+
if combine_files:
|
171 |
+
combined_content = '\n\n---PAGE_BREAK---\n\n'.join(md_contents)
|
172 |
+
for layout_name in layouts:
|
173 |
+
for font_name in fonts:
|
174 |
+
tasks.append({"content": combined_content, "layout": layout_name, "font": font_name, "filename_stem": "Combined_Document"})
|
175 |
+
else:
|
176 |
+
for i, content in enumerate(md_contents):
|
177 |
+
filename_stem = Path(files[i].name).stem
|
178 |
+
for layout_name in layouts:
|
179 |
+
for font_name in fonts:
|
180 |
+
tasks.append({"content": content, "layout": layout_name, "font": font_name, "filename_stem": filename_stem})
|
181 |
+
|
182 |
+
for task in progress.tqdm(tasks, desc="Generating PDFs"):
|
183 |
+
try:
|
184 |
+
date_str = datetime.datetime.now().strftime("%Y-%m-%d")
|
185 |
+
output_filename = f"{task['filename_stem']}_{task['layout'].replace(' ', '-')}_{task['font']}_Cols{num_columns}_{date_str}.pdf"
|
186 |
+
output_path = OUTPUT_DIR / output_filename
|
187 |
+
log_updates += f" - Generating: {output_filename}\n"
|
188 |
+
|
189 |
+
story = markdown_to_story(task['content'], task['font'])
|
190 |
+
pagesize = LAYOUTS[task['layout']]["size"]
|
191 |
+
|
192 |
+
if num_columns > 1:
|
193 |
+
doc = BaseDocTemplate(str(output_path), pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
|
194 |
+
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
|
195 |
+
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height, id=f'col{i}') for i in range(num_columns)]
|
196 |
+
doc.addPageTemplates([PageTemplate(id='TwoCol', frames=frames)])
|
197 |
+
doc.build(story)
|
198 |
+
else:
|
199 |
+
doc = SimpleDocTemplate(str(output_path), pagesize=pagesize, leftMargin=inch, rightMargin=inch, topMargin=inch, bottomMargin=inch)
|
200 |
+
doc.build(story)
|
201 |
+
except Exception as e:
|
202 |
+
log_updates += f" - **ERROR**: {e}\n"
|
203 |
+
|
204 |
+
log_updates += "\nβ
PDF generation complete!"
|
205 |
+
generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
|
206 |
+
return generated_files, log_updates
|
207 |
+
|
208 |
+
# --- Gradio UI Definition ---
|
209 |
+
AVAILABLE_FONTS = discover_and_register_fonts()
|
210 |
+
SAMPLE_MARKDOWN = "# Sample Document π\n\nThis document shows **bold text**, _italic text_, and emojis like π and π».\n\n### A Table\n| Flavor | Rating |\n|-------------|------------|\n| Chocolate | 10/10 |\n| Vanilla | 9/10 |"
|
211 |
+
|
212 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
|
213 |
+
gr.Markdown("# π Advanced PDF Generator with Emojis & Columns")
|
214 |
+
gr.Markdown("Upload Markdown files, combine them, and generate multi-column PDFs with custom fonts and layouts.")
|
215 |
+
|
216 |
+
with gr.Row():
|
217 |
+
with gr.Column(scale=1):
|
218 |
+
gr.Markdown("### βοΈ Generation Settings")
|
219 |
+
uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
|
220 |
+
combine_files_check = gr.Checkbox(label="Combine uploaded files into a single PDF", value=False)
|
221 |
+
num_columns_slider = gr.Slider(label="Number of Columns", minimum=1, maximum=4, step=1, value=1)
|
222 |
+
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
|
223 |
+
|
224 |
+
if not AVAILABLE_FONTS:
|
225 |
+
gr.Warning("No text fonts found in 'fonts' directory. Using defaults.")
|
226 |
+
AVAILABLE_FONTS = ["Helvetica"]
|
227 |
+
|
228 |
+
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
|
229 |
+
generate_btn = gr.Button("π Generate PDFs", variant="primary")
|
230 |
+
gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10, interactive=False)
|
231 |
+
|
232 |
+
with gr.Column(scale=2):
|
233 |
+
gr.Markdown("### π Results")
|
234 |
+
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
|
235 |
+
file_output = gr.Files(label="Download Generated PDFs")
|
236 |
+
|
237 |
+
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, combine_files_check, num_columns_slider], outputs=[file_output, log_output])
|
238 |
+
|
239 |
+
if __name__ == "__main__":
|
240 |
+
demo.launch()
|