awacke1 commited on
Commit
afb8307
Β·
verified Β·
1 Parent(s): 294909e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -0
app.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ import datetime
4
+ import re
5
+ import requests
6
+ import os
7
+ import shutil
8
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate
9
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
10
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
11
+ from reportlab.lib.units import inch
12
+ from reportlab.lib import colors
13
+ from reportlab.pdfbase import pdfmetrics
14
+ from reportlab.pdfbase.ttfonts import TTFont
15
+
16
+ # --- Configuration & Setup ---
17
+ LAYOUTS = {
18
+ "A4 Portrait": {"size": A4},
19
+ "A4 Landscape": {"size": landscape(A4)},
20
+ "Letter Portrait": {"size": letter},
21
+ "Letter Landscape": {"size": landscape(letter)},
22
+ "Legal Portrait": {"size": legal},
23
+ "Legal Landscape": {"size": landscape(legal)},
24
+ }
25
+ OUTPUT_DIR = Path("generated_pdfs")
26
+ OUTPUT_DIR.mkdir(exist_ok=True)
27
+ FONT_DIR = Path("fonts")
28
+ FONT_DIR.mkdir(exist_ok=True)
29
+ EMOJI_FONT_NAME = "NotoColorEmoji"
30
+
31
+ # --- Font & Emoji Handling ---
32
+
33
+ def download_fonts():
34
+ """Downloads DejaVuSans (for text) and NotoColorEmoji (for emojis) if they don't exist."""
35
+ fonts_to_check = {
36
+ "DejaVuSans.ttf": "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true",
37
+ "NotoColorEmoji.ttf": "https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf?raw=true"
38
+ }
39
+ for font_filename, url in fonts_to_check.items():
40
+ font_path = FONT_DIR / font_filename
41
+ if not font_path.exists():
42
+ print(f"Downloading {font_filename}...")
43
+ try:
44
+ r = requests.get(url, allow_redirects=True)
45
+ r.raise_for_status()
46
+ with open(font_path, "wb") as f:
47
+ f.write(r.content)
48
+ print(f"{font_filename} downloaded successfully.")
49
+ except Exception as e:
50
+ print(f"Failed to download {font_filename}: {e}")
51
+
52
+ def discover_and_register_fonts():
53
+ """Finds .ttf files, registers them, and ensures the emoji font is available."""
54
+ download_fonts()
55
+ font_files = list(FONT_DIR.glob("*.ttf"))
56
+ font_names = []
57
+ for font_path in font_files:
58
+ try:
59
+ font_name = font_path.stem
60
+ pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
61
+ # Don't add the emoji font to the user-selectable list
62
+ if "emoji" not in font_name.lower():
63
+ font_names.append(font_name)
64
+ except Exception as e:
65
+ print(f"Could not register font {font_path.name}: {e}")
66
+ return sorted(font_names)
67
+
68
+ def apply_emoji_font(text: str) -> str:
69
+ """Wraps emoji characters in a <font> tag to use the dedicated emoji font."""
70
+ # This regex covers a wide range of emoji unicode blocks.
71
+ emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
72
+ f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
73
+ f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
74
+ f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
75
+ f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
76
+ return emoji_pattern.sub(fr'<font name="{EMOJI_FONT_NAME}">\1</font>', text)
77
+
78
+
79
+ # --- ReportLab PDF Generation (Core Logic) ---
80
+
81
+ def markdown_to_story(markdown_text: str, font_name: str):
82
+ """Converts markdown to a ReportLab story, handling emojis and page breaks."""
83
+ styles = getSampleStyleSheet()
84
+ style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6, leading=14)
85
+ style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=20, leading=24)
86
+ style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, spaceBefore=10, fontSize=16, leading=20)
87
+ style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, spaceBefore=8, fontSize=14, leading=18)
88
+ style_code = ParagraphStyle('Code', parent=styles['Code'], fontName='Courier', backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))
89
+
90
+ story = []
91
+ # Split by our custom page break marker or process as a single block
92
+ pages = markdown_text.split('\n\n---PAGE_BREAK---\n\n')
93
+
94
+ for i, page_content in enumerate(pages):
95
+ lines = page_content.split('\n')
96
+ in_code_block, in_table = False, False
97
+ code_block_text, table_data = "", []
98
+
99
+ for line in lines:
100
+ line_with_emoji = apply_emoji_font(line)
101
+ # Table handler...
102
+ if line.strip().startswith('|') and line.strip().endswith('|'):
103
+ if not in_table: in_table = True
104
+ if all(c in '-|: ' for c in line.strip()): continue
105
+ cells = [apply_emoji_font(c.strip()) for c in line.strip().strip('|').split('|')]
106
+ table_data.append([Paragraph(cell, style_normal) for cell in cells])
107
+ continue
108
+ elif in_table:
109
+ in_table = False
110
+ if table_data:
111
+ table = Table(table_data, hAlign='LEFT', repeatRows=1)
112
+ table.setStyle(TableStyle([
113
+ ('BACKGROUND', (0,0), (-1,0), colors.lightgrey),
114
+ ('TEXTCOLOR', (0,0), (-1,0), colors.black),
115
+ ('ALIGN', (0,0), (-1,-1), 'LEFT'),
116
+ ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
117
+ ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
118
+ ('BOTTOMPADDING', (0,0), (-1,0), 12),
119
+ ('GRID', (0,0), (-1,-1), 1, colors.black)
120
+ ]))
121
+ story.append(table)
122
+ story.append(Spacer(1, 0.2 * inch))
123
+
124
+ # Code block handler...
125
+ if line.strip().startswith("```"):
126
+ in_code_block = not in_code_block
127
+ if not in_code_block:
128
+ story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
129
+ code_block_text = ""
130
+ continue
131
+ if in_code_block:
132
+ code_block_text += line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') + '\n'
133
+ continue
134
+
135
+ # Markdown elements to Flowables
136
+ if line.startswith("# "): story.append(Paragraph(line_with_emoji[2:], style_h1))
137
+ elif line.startswith("## "): story.append(Paragraph(line_with_emoji[3:], style_h2))
138
+ elif line.startswith("### "): story.append(Paragraph(line_with_emoji[4:], style_h3))
139
+ elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line_with_emoji.strip()[2:], style_normal, bulletText='β€’'))
140
+ elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line_with_emoji.strip(), style_normal))
141
+ elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
142
+ else:
143
+ formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line_with_emoji))
144
+ story.append(Paragraph(formatted_line, style_normal))
145
+
146
+ if i < len(pages) - 1:
147
+ story.append(PageBreak())
148
+
149
+ return story
150
+
151
+ # --- Gradio API Function ---
152
+
153
+ def generate_pdfs_api(files, layouts, fonts, combine_files, num_columns, progress=gr.Progress(track_tqdm=True)):
154
+ if not files: raise gr.Error("Please upload at least one Markdown file.")
155
+ if not layouts: raise gr.Error("Please select at least one page layout.")
156
+ if not fonts: raise gr.Error("Please select at least one font.")
157
+
158
+ if OUTPUT_DIR.exists():
159
+ shutil.rmtree(OUTPUT_DIR)
160
+ OUTPUT_DIR.mkdir(exist_ok=True)
161
+
162
+ log_updates = "Starting PDF generation...\n"
163
+
164
+ md_contents = []
165
+ for md_file_obj in files:
166
+ with open(md_file_obj.name, 'r', encoding='utf-8') as f:
167
+ md_contents.append(f.read())
168
+
169
+ tasks = []
170
+ if combine_files:
171
+ combined_content = '\n\n---PAGE_BREAK---\n\n'.join(md_contents)
172
+ for layout_name in layouts:
173
+ for font_name in fonts:
174
+ tasks.append({"content": combined_content, "layout": layout_name, "font": font_name, "filename_stem": "Combined_Document"})
175
+ else:
176
+ for i, content in enumerate(md_contents):
177
+ filename_stem = Path(files[i].name).stem
178
+ for layout_name in layouts:
179
+ for font_name in fonts:
180
+ tasks.append({"content": content, "layout": layout_name, "font": font_name, "filename_stem": filename_stem})
181
+
182
+ for task in progress.tqdm(tasks, desc="Generating PDFs"):
183
+ try:
184
+ date_str = datetime.datetime.now().strftime("%Y-%m-%d")
185
+ output_filename = f"{task['filename_stem']}_{task['layout'].replace(' ', '-')}_{task['font']}_Cols{num_columns}_{date_str}.pdf"
186
+ output_path = OUTPUT_DIR / output_filename
187
+ log_updates += f" - Generating: {output_filename}\n"
188
+
189
+ story = markdown_to_story(task['content'], task['font'])
190
+ pagesize = LAYOUTS[task['layout']]["size"]
191
+
192
+ if num_columns > 1:
193
+ doc = BaseDocTemplate(str(output_path), pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
194
+ frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
195
+ frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height, id=f'col{i}') for i in range(num_columns)]
196
+ doc.addPageTemplates([PageTemplate(id='TwoCol', frames=frames)])
197
+ doc.build(story)
198
+ else:
199
+ doc = SimpleDocTemplate(str(output_path), pagesize=pagesize, leftMargin=inch, rightMargin=inch, topMargin=inch, bottomMargin=inch)
200
+ doc.build(story)
201
+ except Exception as e:
202
+ log_updates += f" - **ERROR**: {e}\n"
203
+
204
+ log_updates += "\nβœ… PDF generation complete!"
205
+ generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
206
+ return generated_files, log_updates
207
+
208
+ # --- Gradio UI Definition ---
209
+ AVAILABLE_FONTS = discover_and_register_fonts()
210
+ SAMPLE_MARKDOWN = "# Sample Document πŸš€\n\nThis document shows **bold text**, _italic text_, and emojis like 😊 and πŸ’».\n\n### A Table\n| Flavor | Rating |\n|-------------|------------|\n| Chocolate | 10/10 |\n| Vanilla | 9/10 |"
211
+
212
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
213
+ gr.Markdown("# πŸ“„ Advanced PDF Generator with Emojis & Columns")
214
+ gr.Markdown("Upload Markdown files, combine them, and generate multi-column PDFs with custom fonts and layouts.")
215
+
216
+ with gr.Row():
217
+ with gr.Column(scale=1):
218
+ gr.Markdown("### βš™οΈ Generation Settings")
219
+ uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
220
+ combine_files_check = gr.Checkbox(label="Combine uploaded files into a single PDF", value=False)
221
+ num_columns_slider = gr.Slider(label="Number of Columns", minimum=1, maximum=4, step=1, value=1)
222
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
223
+
224
+ if not AVAILABLE_FONTS:
225
+ gr.Warning("No text fonts found in 'fonts' directory. Using defaults.")
226
+ AVAILABLE_FONTS = ["Helvetica"]
227
+
228
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
229
+ generate_btn = gr.Button("πŸš€ Generate PDFs", variant="primary")
230
+ gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10, interactive=False)
231
+
232
+ with gr.Column(scale=2):
233
+ gr.Markdown("### πŸ“„ Results")
234
+ log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
235
+ file_output = gr.Files(label="Download Generated PDFs")
236
+
237
+ generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, combine_files_check, num_columns_slider], outputs=[file_output, log_output])
238
+
239
+ if __name__ == "__main__":
240
+ demo.launch()