awacke1 commited on
Commit
e1e5118
Β·
verified Β·
1 Parent(s): 1e29c1e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +255 -0
app.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ import datetime
4
+ import re
5
+ import os
6
+ import shutil
7
+ import fitz # PyMuPDF
8
+ from PIL import Image
9
+ from collections import defaultdict
10
+ import io
11
+ from pypdf import PdfWriter
12
+
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
14
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
16
+ from reportlab.lib.units import inch
17
+ from reportlab.lib import colors
18
+ from reportlab.pdfbase import pdfmetrics
19
+ from reportlab.pdfbase.ttfonts import TTFont
20
+
21
+ # --- Configuration & Setup ---
22
+ CWD = Path.cwd()
23
+ LAYOUTS = {
24
+ "A4 Portrait": {"size": A4},
25
+ "A4 Landscape": {"size": landscape(A4)},
26
+ "Letter Portrait": {"size": letter},
27
+ "Letter Landscape": {"size": landscape(letter)},
28
+ "Legal Portrait": {"size": legal},
29
+ "Legal Landscape": {"size": landscape(legal)},
30
+ }
31
+ OUTPUT_DIR = CWD / "generated_pdfs"
32
+ PREVIEW_DIR = CWD / "previews"
33
+ FONT_DIR = CWD
34
+
35
+ # Create necessary directories
36
+ OUTPUT_DIR.mkdir(exist_ok=True)
37
+ PREVIEW_DIR.mkdir(exist_ok=True)
38
+
39
+
40
+ # --- Font & Emoji Handling ---
41
+
42
+ def register_local_fonts():
43
+ """Finds and registers all .ttf files from the application's base directory."""
44
+ print("--- Font Registration Process Starting ---")
45
+ text_font_names = []
46
+ emoji_font_name = None
47
+
48
+ print(f"Scanning for fonts in: {FONT_DIR.absolute()}")
49
+ font_files = list(FONT_DIR.glob("*.ttf"))
50
+ print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
51
+
52
+ for font_path in font_files:
53
+ try:
54
+ font_name = font_path.stem
55
+ pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
56
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
57
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Italic", str(font_path)))
58
+ pdfmetrics.registerFont(TTFont(f"{font_name}-BoldItalic", str(font_path)))
59
+ pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold", italic=f"{font_name}-Italic", boldItalic=f"{font_name}-BoldItalic")
60
+
61
+ if "notocoloremoji-regular" in font_name.lower():
62
+ emoji_font_name = font_name
63
+ elif "notoemoji" not in font_name.lower(): # Exclude other symbol fonts from text selection
64
+ text_font_names.append(font_name)
65
+ except Exception as e:
66
+ print(f"Could not register font {font_path.name}: {e}")
67
+
68
+ if not text_font_names:
69
+ print("WARNING: No text fonts found. Adding 'Helvetica' as a default.")
70
+ text_font_names.append('Helvetica')
71
+
72
+ print(f"Successfully registered user-selectable fonts: {text_font_names}")
73
+ print(f"Emoji font set to: {emoji_font_name}")
74
+ print("--- Font Registration Process Finished ---")
75
+ return sorted(text_font_names), emoji_font_name
76
+
77
+ def apply_emoji_font(text: str, emoji_font_name: str) -> str:
78
+ """Wraps emoji characters in a <font> tag to use the dedicated emoji font."""
79
+ if not emoji_font_name:
80
+ return text
81
+
82
+ emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
83
+ f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
84
+ f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
85
+ f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
86
+ f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
87
+ return emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', text)
88
+
89
+
90
+ # --- PDF Generation & Handling ---
91
+
92
+ def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
93
+ """Converts markdown to a ReportLab story, with enhanced styling and page breaks."""
94
+ styles = getSampleStyleSheet()
95
+ # Define styles for various markdown elements
96
+ style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, leading=14, fontSize=10)
97
+ style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=24, leading=28, textColor=colors.darkblue)
98
+ style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, fontSize=18, leading=22, spaceBefore=10)
99
+ style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, fontSize=14, leading=18, spaceBefore=8)
100
+ style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8, leading=12, fontSize=9)
101
+ style_table_header = ParagraphStyle('TableHeader', parent=style_normal, fontName=font_name + "-Bold" if font_name != 'Helvetica' else 'Helvetica-Bold')
102
+
103
+ story = []
104
+ lines = markdown_text.split('\n')
105
+
106
+ in_code_block, in_table = False, False
107
+ code_block_text, table_data = "", []
108
+ first_heading = True
109
+
110
+ for line in lines:
111
+ if line.strip().startswith("```"):
112
+ if in_code_block:
113
+ story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)); story.append(Spacer(1, 0.1 * inch))
114
+ in_code_block = False; code_block_text = ""
115
+ else:
116
+ in_code_block = True
117
+ continue
118
+ if in_code_block:
119
+ code_block_text += line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') + '\n'
120
+ continue
121
+
122
+ if line.strip().startswith('|'):
123
+ if not in_table: in_table = True
124
+ if all(c in '-|: ' for c in line.strip()): continue
125
+ cells = [apply_emoji_font(cell.strip(), emoji_font) for cell in line.strip().strip('|').split('|')]
126
+ table_data.append(cells)
127
+ continue
128
+ if in_table:
129
+ in_table = False
130
+ if table_data:
131
+ header = [Paragraph(cell, style_table_header) for cell in table_data[0]]
132
+ formatted_rows = [[Paragraph(cell, style_normal) for cell in row] for row in table_data[1:]]
133
+ table = Table([header] + formatted_rows, hAlign='LEFT', repeatRows=1)
134
+ table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.darkgrey), ('VALIGN', (0,0), (-1,-1), 'MIDDLE')]))
135
+ story.append(table); story.append(Spacer(1, 0.2 * inch))
136
+ table_data = []
137
+
138
+ line_with_emoji = apply_emoji_font(line, emoji_font)
139
+ formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line_with_emoji))
140
+
141
+ if line.startswith("# "):
142
+ if not first_heading: story.append(PageBreak())
143
+ story.append(Paragraph(formatted_line[2:], style_h1)); first_heading = False
144
+ elif line.startswith("## "): story.append(Paragraph(formatted_line[3:], style_h2))
145
+ elif line.startswith("### "): story.append(Paragraph(formatted_line[4:], style_h3))
146
+ elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(formatted_line.strip()[2:], style_normal, bulletText='β€’'))
147
+ elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(formatted_line.strip(), style_normal))
148
+ elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
149
+ else: story.append(Paragraph(formatted_line, style_normal))
150
+
151
+ return story
152
+
153
+ def create_pdf_preview(pdf_path: Path):
154
+ preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
155
+ try:
156
+ doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
157
+ pix.save(str(preview_path)); doc.close()
158
+ return str(preview_path)
159
+ except Exception as e:
160
+ print(f"Could not create preview for {pdf_path.name}: {e}"); return None
161
+
162
+ # --- Main API Function ---
163
+ def generate_pdfs_api(files, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
164
+ if not files: raise gr.Error("Please upload at least one Markdown or Image file.")
165
+ if not layouts: raise gr.Error("Please select at least one page layout.")
166
+ if not fonts: raise gr.Error("Please select at least one font.")
167
+
168
+ shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
169
+ OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
170
+
171
+ grouped_files = defaultdict(lambda: {'md': [], 'img': []})
172
+ for f in files:
173
+ file_path = Path(f.name)
174
+ stem = file_path.stem.split('_')[0] if '_' in file_path.stem else file_path.stem
175
+ if file_path.suffix.lower() == '.md': grouped_files[stem]['md'].append(file_path)
176
+ elif file_path.suffix.lower() in ['.png', '.jpg', '.jpeg']: grouped_files[stem]['img'].append(file_path)
177
+
178
+ log_updates, generated_pdf_paths = "", []
179
+
180
+ for stem, assets in progress.tqdm(grouped_files.items(), desc="Processing File Groups"):
181
+ for layout_name in layouts:
182
+ for font_name in fonts:
183
+ merger = PdfWriter()
184
+
185
+ if assets['md']:
186
+ md_content = "\n".join([p.read_text(encoding='utf-8') for p in assets['md']])
187
+ md_buffer = io.BytesIO()
188
+ story = markdown_to_story(md_content, font_name, EMOJI_FONT_NAME)
189
+
190
+ base_w, base_h = LAYOUTS[layout_name]["size"]
191
+ pagesize = (base_w * page_w_mult, base_h * page_h_mult)
192
+
193
+ if num_columns > 1:
194
+ doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
195
+ frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
196
+ frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
197
+ doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
198
+ else:
199
+ doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
200
+ doc.build(story)
201
+ merger.append(fileobj=md_buffer)
202
+
203
+ for img_path in assets['img']:
204
+ with Image.open(img_path) as img: img_width, img_height = img.size
205
+ img_buffer = io.BytesIO()
206
+ doc = SimpleDocTemplate(img_buffer, pagesize=(img_width, img_height), leftMargin=0, rightMargin=0, topMargin=0, bottomMargin=0)
207
+ doc.build([ReportLabImage(img_path, width=img_width, height=img_height)])
208
+ merger.append(fileobj=img_buffer)
209
+
210
+ if len(merger.pages) > 0:
211
+ time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
212
+ filename = f"{stem}_{time_str}_{layout_name.replace(' ','-')}_{page_w_mult}x{page_h_mult}_{font_name}_Cols{num_columns}.pdf"
213
+ output_path = OUTPUT_DIR / filename
214
+ with open(output_path, "wb") as f: merger.write(f)
215
+ generated_pdf_paths.append(output_path)
216
+ log_updates += f"Generated: {filename}\n"
217
+
218
+ gallery_previews = [create_pdf_preview(p) for p in generated_pdf_paths]
219
+ final_gallery = [g for g in gallery_previews if g is not None]
220
+
221
+ return final_gallery, log_updates, [str(p) for p in generated_pdf_paths]
222
+
223
+ # --- Gradio UI Definition ---
224
+ AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
225
+ SAMPLE_MARKDOWN = "# Document Title πŸš€\n\nThis is the first section.\n\n## Subsection 1.1\n\nHere is some text with **bold** and _italic_ elements. And an emoji: 😊\n\n# Chapter Two\n\nThis new chapter starts on a new page automatically.\n\n### A Table of Data\n| Item | Category | Status |\n|---|---|---|\n| Chocolate 🍫| Food | Delicious |\n| Computer πŸ’»| Tech | Necessary |\n\n```python\n# This is a code block.\ndef main():\n print(\"Hello, PDF World!\")\n```"
226
+ with open(CWD / "sample.md", "w") as f: f.write(SAMPLE_MARKDOWN)
227
+
228
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
229
+ gr.Markdown("# πŸ“„ Advanced PDF Layout Engine")
230
+ gr.Markdown("Upload Markdown/Image files. The app finds local `.ttf` fonts. Group assets with a common name (e.g., `Doc_part1.md`, `Doc_img1.png`) to combine them. `# Headers` create automatic page breaks.")
231
+
232
+ with gr.Row():
233
+ with gr.Column(scale=1):
234
+ gr.Markdown("### βš™οΈ Generation Settings")
235
+ uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg", ".jpeg"])
236
+
237
+ with gr.Row():
238
+ page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
239
+ page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
240
+
241
+ num_columns_slider = gr.Slider(label="Number of Text Columns", minimum=1, maximum=4, step=1, value=1)
242
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Base Page Layout", value=["A4 Portrait"])
243
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
244
+ generate_btn = gr.Button("πŸš€ Generate PDFs", variant="primary")
245
+
246
+ with gr.Column(scale=2):
247
+ gr.Markdown("### πŸ–ΌοΈ PDF Preview Gallery")
248
+ gallery_output = gr.Gallery(label="Generated PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
249
+ log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
250
+ downloadable_files_output = gr.Files(label="Download Generated PDFs")
251
+
252
+ generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider], outputs=[gallery_output, log_output, downloadable_files_output])
253
+
254
+ if __name__ == "__main__":
255
+ demo.launch()