awacke1 commited on
Commit
eb23ca7
Β·
verified Β·
1 Parent(s): a587a36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -214
app.py CHANGED
@@ -6,11 +6,9 @@ import os
6
  import shutil
7
  import fitz # PyMuPDF
8
  from PIL import Image
 
9
  import io
10
-
11
- # Imports for new formats
12
- from docx import Document
13
- import openpyxl
14
 
15
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
16
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
@@ -27,8 +25,10 @@ LAYOUTS = {
27
  "A4 Landscape": {"size": landscape(A4)},
28
  "Letter Portrait": {"size": letter},
29
  "Letter Landscape": {"size": landscape(letter)},
 
 
30
  }
31
- OUTPUT_DIR = CWD / "generated_outputs"
32
  PREVIEW_DIR = CWD / "previews"
33
  FONT_DIR = CWD
34
 
@@ -36,263 +36,289 @@ FONT_DIR = CWD
36
  OUTPUT_DIR.mkdir(exist_ok=True)
37
  PREVIEW_DIR.mkdir(exist_ok=True)
38
 
 
39
  # --- Font & Emoji Handling ---
40
- EMOJI_FONT_PATH = None
41
- EMOJI_IMAGE_CACHE = {}
42
 
43
- def setup_fonts():
44
- """Finds and registers all .ttf files from the app directory."""
45
- global EMOJI_FONT_PATH
46
  text_font_names = []
 
47
 
48
- noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf"
49
- if noto_emoji_path.exists():
50
- EMOJI_FONT_PATH = str(noto_emoji_path)
51
- print(f"Found emoji font: {EMOJI_FONT_PATH}")
52
- else:
53
- raise FileNotFoundError("CRITICAL: 'NotoColorEmoji-Regular.ttf' not found. This file is required for emoji support.")
54
-
55
- for font_path in FONT_DIR.glob("*.ttf"):
56
  try:
57
  font_name = font_path.stem
58
  pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
59
- pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=font_name, italic=font_name, boldItalic=font_name)
60
- if "notocoloremoji" not in font_name.lower():
 
 
 
 
 
 
61
  text_font_names.append(font_name)
62
  except Exception as e:
63
  print(f"Could not register font {font_path.name}: {e}")
64
 
65
  if not text_font_names:
 
66
  text_font_names.append('Helvetica')
67
- return sorted(text_font_names)
68
 
69
- def render_emoji_as_image(emoji_char, size_pt):
70
- """Renders an emoji character as a PNG image in memory."""
71
- if not EMOJI_FONT_PATH: return None
72
- if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE: return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)]
73
-
74
- try:
75
- rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5)
76
- doc = fitz.open()
77
- page = doc.new_page(width=rect.width, height=rect.height)
78
- page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH)
79
- page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt)
80
- pix = page.get_pixmap(alpha=True, dpi=300)
81
- doc.close()
82
- img_buffer = io.BytesIO(pix.tobytes("png"))
83
- img_buffer.seek(0)
84
- EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer
85
- return img_buffer
86
- except Exception as e:
87
- print(f"ERROR: Could not render emoji '{emoji_char}': {e}")
88
- return None
89
 
90
- # --- Document Generation Engines ---
91
- def markdown_to_story(markdown_text: str, font_name: str):
92
- """Converts markdown to a ReportLab story, converting emojis to images."""
93
- styles = getSampleStyleSheet()
94
- style_body = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=11, leading=14)
95
- style_h1 = ParagraphStyle('h1', fontName=font_name, fontSize=24, leading=28, spaceAfter=12, textColor=colors.darkblue)
96
- style_h2 = ParagraphStyle('h2', fontName=font_name, fontSize=18, leading=22, spaceAfter=10, textColor=colors.darkslateblue)
97
 
 
98
  emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
99
  f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
100
  f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
101
- f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)")
102
-
103
- def text_to_flowables(text, style):
104
- parts = emoji_pattern.split(text)
105
- flowables = []
106
- for part in parts:
107
- if not part: continue
108
- if emoji_pattern.match(part):
109
- for emoji_char in part:
110
- img_buffer = render_emoji_as_image(emoji_char, style.fontSize)
111
- if img_buffer:
112
- img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2)
113
- flowables.append(img)
114
- else:
115
- formatted_part = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', part)
116
- flowables.append(Paragraph(formatted_part, style))
117
- return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0, 0), (-1, -1), 'MIDDLE')]) if flowables else Spacer(0,0)
118
-
119
- story, first_heading = [], True
120
- for line in markdown_text.split('\n'):
121
- stripped = line.strip()
122
- if stripped.startswith("# "):
123
- if not first_heading: story.append(PageBreak())
124
- story.append(text_to_flowables(stripped[2:], style_h1)); first_heading = False
125
- elif stripped.startswith("## "):
126
- story.append(text_to_flowables(stripped[3:], style_h2))
127
- elif stripped.startswith(("- ", "* ")):
128
- story.append(text_to_flowables(stripped[2:], ParagraphStyle(parent=style_body, leftIndent=20, bulletIndent=10)))
129
- elif stripped:
130
- story.append(text_to_flowables(stripped, style_body))
131
  else:
132
- story.append(Spacer(1, 0.2*inch))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return story
134
 
135
- def create_pdf(md_content, font_name, pagesize, num_columns):
136
- """Generates a PDF file from markdown content."""
137
- md_buffer = io.BytesIO()
138
- story = markdown_to_story(md_content, font_name)
139
- if num_columns > 1:
140
- doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
141
- frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
142
- frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
143
- doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
144
- else:
145
- doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
146
- doc.build(story)
147
- return md_buffer
148
-
149
- def create_docx(md_content):
150
- """Generates a DOCX file from markdown content."""
151
- document = Document()
152
- for line in md_content.split('\n'):
153
- line = line.strip()
154
- if line.startswith('# '): document.add_heading(line[2:], level=1)
155
- elif line.startswith('## '): document.add_heading(line[3:], level=2)
156
- elif line.startswith(('- ','* ')): document.add_paragraph(line[2:], style='List Bullet')
157
- elif line:
158
- p = document.add_paragraph()
159
- parts = re.split(r'(\*\*.*?\*\*)', line)
160
- for part in parts:
161
- if part.startswith('**') and part.endswith('**'):
162
- p.add_run(part[2:-2]).bold = True
163
- else:
164
- p.add_run(part)
165
- return document
166
-
167
- def create_xlsx(md_content):
168
- """Generates an XLSX file, splitting content by H1 headers into columns."""
169
- workbook = openpyxl.Workbook()
170
- sheet = workbook.active
171
- sections = re.split(r'\n# ', '\n' + md_content)
172
- if not sections[0].strip(): sections.pop(0)
173
-
174
- for c_idx, section in enumerate(sections, 1):
175
- lines = section.split('\n')
176
- sheet.cell(row=1, column=c_idx, value=lines[0].strip()) # Header
177
- for r_idx, line_content in enumerate(lines[1:], 2):
178
- sheet.cell(row=r_idx, column=c_idx, value=line_content.strip())
179
- return workbook
180
 
181
  def create_pdf_preview(pdf_path: Path):
182
  preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
183
  try:
184
- doc = fitz.open(pdf_path)
185
- page = doc.load_page(0)
186
- pix = page.get_pixmap(dpi=150)
187
- pix.save(str(preview_path))
188
- doc.close()
189
  return str(preview_path)
190
  except Exception as e:
191
- print(f"Preview failed for {pdf_path.name}: {e}")
192
- return None
193
 
194
  # --- Main API Function ---
195
- def generate_outputs_api(files, output_formats, layouts, fonts, num_columns, progress=gr.Progress(track_tqdm=True)):
196
- if not files: raise gr.Error("Please upload at least one Markdown (.md) file.")
197
- if not output_formats: raise gr.Error("Please select at least one output format.")
 
198
 
199
  shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
200
  OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
201
- EMOJI_IMAGE_CACHE.clear()
202
 
203
- # Consolidate all markdown content, adding H1 headers between files
204
- md_contents = []
205
  for f in files:
206
- try:
207
- md_contents.append(Path(f.name).read_text(encoding='utf-8'))
208
- except Exception as e:
209
- print(f"Error reading {f.name}: {e}")
210
- md_content = "\n\n# ".join(md_contents)
211
- if not md_content.startswith("#"): md_content = "# " + md_content
212
 
213
- generated_files = []
214
- for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
215
- time_str = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
216
-
217
- try:
218
- if format_choice == "PDF":
219
- for layout_name in layouts:
220
- for font_name in fonts:
221
- pagesize = LAYOUTS.get(layout_name, {}).get("size", letter)
222
- pdf_buffer = create_pdf(md_content, font_name, pagesize, num_columns)
223
- filename = f"Document_{layout_name.replace(' ','-')}_{font_name}_{time_str}.pdf"
224
- output_path = OUTPUT_DIR / filename
225
- with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
226
- generated_files.append(output_path)
227
-
228
- elif format_choice == "DOCX":
229
- docx_doc = create_docx(md_content)
230
- filename = f"Document_{time_str}.docx"
231
- output_path = OUTPUT_DIR / filename
232
- docx_doc.save(output_path)
233
- generated_files.append(output_path)
234
-
235
- elif format_choice == "XLSX":
236
- xlsx_book = create_xlsx(md_content)
237
- filename = f"Outline_{time_str}.xlsx"
238
- output_path = OUTPUT_DIR / filename
239
- xlsx_book.save(output_path)
240
- generated_files.append(output_path)
241
- except Exception as e:
242
- print(f"Failed to generate {format_choice}: {e}")
243
- gr.Warning(f"Failed to generate {format_choice}. See console for details.")
244
 
245
- gallery_previews = [p for p in [create_pdf_preview(f) for f in generated_files if f.suffix == '.pdf'] if p]
246
- log_message = f"Generated {len(generated_files)} files." if generated_files else "Generation failed. Check logs."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
- return gallery_previews, log_message, [str(p) for p in generated_files]
249
 
250
  # --- Gradio UI Definition ---
251
- try:
252
- AVAILABLE_FONTS = setup_fonts()
253
- except FileNotFoundError as e:
254
- print(e)
255
- # If font setup fails, we can't run the app.
256
- # This prevents Gradio from starting with a fatal error.
257
- AVAILABLE_FONTS = []
258
-
259
- with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Format Document Generator") as demo:
260
- gr.Markdown("# πŸ“„ Multi-Format Document Generator (PDF, DOCX, XLSX)")
261
- gr.Markdown("Upload one or more Markdown files (`.md`). The tool will combine them and generate documents in your chosen formats. Emojis in PDFs are fully supported! πŸ₯³")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  with gr.Row():
264
  with gr.Column(scale=1):
265
  gr.Markdown("### βš™οΈ Generation Settings")
266
- uploaded_files = gr.File(label="Upload Markdown Files", file_count="multiple", file_types=[".md"])
267
- output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
268
 
269
- with gr.Accordion("PDF Customization", open=True):
270
- selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["Letter Portrait"])
271
- selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
272
- num_columns_slider = gr.Slider(label="Text Columns (for PDF)", minimum=1, maximum=4, step=1, value=1)
273
 
274
- generate_btn = gr.Button("πŸš€ Generate Documents", variant="primary")
 
 
 
275
 
276
  with gr.Column(scale=2):
277
- gr.Markdown("### πŸ–ΌοΈ Output Files")
278
- gallery_output = gr.Gallery(label="PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
279
- log_output = gr.Markdown(label="Generation Log", value="Ready...")
280
- downloadable_files_output = gr.Files(label="Download Generated Files")
281
 
282
- if not AVAILABLE_FONTS:
283
- gr.Warning("The application is in a degraded state. Required font files are missing. Please check the console log and add the necessary files.")
284
- else:
285
- generate_btn.click(
286
- fn=generate_outputs_api,
287
- inputs=[uploaded_files, output_formats, selected_layouts, selected_fonts, num_columns_slider],
288
- outputs=[gallery_output, log_output, downloadable_files_output]
289
- )
290
 
291
  if __name__ == "__main__":
292
- if AVAILABLE_FONTS:
293
- demo.launch()
294
- else:
295
- print("\n" + "="*60)
296
- print("Application launch aborted due to missing font files.")
297
- print("Please ensure 'NotoColorEmoji-Regular.ttf' is in the project directory.")
298
- print("="*60)
 
6
  import shutil
7
  import fitz # PyMuPDF
8
  from PIL import Image
9
+ from collections import defaultdict
10
  import io
11
+ from pypdf import PdfWriter
 
 
 
12
 
13
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
14
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 
25
  "A4 Landscape": {"size": landscape(A4)},
26
  "Letter Portrait": {"size": letter},
27
  "Letter Landscape": {"size": landscape(letter)},
28
+ "Legal Portrait": {"size": legal},
29
+ "Legal Landscape": {"size": landscape(legal)},
30
  }
31
+ OUTPUT_DIR = CWD / "generated_pdfs"
32
  PREVIEW_DIR = CWD / "previews"
33
  FONT_DIR = CWD
34
 
 
36
  OUTPUT_DIR.mkdir(exist_ok=True)
37
  PREVIEW_DIR.mkdir(exist_ok=True)
38
 
39
+
40
  # --- Font & Emoji Handling ---
 
 
41
 
42
+ def register_local_fonts():
43
+ """Finds and registers all .ttf files from the application's base directory."""
44
+ print("--- Font Registration Process Starting ---")
45
  text_font_names = []
46
+ emoji_font_name = None
47
 
48
+ print(f"Scanning for fonts in: {FONT_DIR.absolute()}")
49
+ font_files = list(FONT_DIR.glob("*.ttf"))
50
+ print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
51
+
52
+ for font_path in font_files:
 
 
 
53
  try:
54
  font_name = font_path.stem
55
  pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
56
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
57
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Italic", str(font_path)))
58
+ pdfmetrics.registerFont(TTFont(f"{font_name}-BoldItalic", str(font_path)))
59
+ pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold", italic=f"{font_name}-Italic", boldItalic=f"{font_name}-BoldItalic")
60
+
61
+ if "notocoloremoji-regular" in font_name.lower():
62
+ emoji_font_name = font_name
63
+ elif "notoemoji" not in font_name.lower(): # Exclude other symbol fonts from text selection
64
  text_font_names.append(font_name)
65
  except Exception as e:
66
  print(f"Could not register font {font_path.name}: {e}")
67
 
68
  if not text_font_names:
69
+ print("WARNING: No text fonts found. Adding 'Helvetica' as a default.")
70
  text_font_names.append('Helvetica')
 
71
 
72
+ print(f"Successfully registered user-selectable fonts: {text_font_names}")
73
+ print(f"Emoji font set to: {emoji_font_name}")
74
+ print("--- Font Registration Process Finished ---")
75
+ return sorted(text_font_names), emoji_font_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ def apply_emoji_font(text: str, emoji_font_name: str) -> str:
78
+ """
79
+ Intelligently wraps emoji characters in a <font> tag, preserving existing HTML-like tags.
80
+ This prevents invalid nested tags like <b><font>...</font></b> which ReportLab handles poorly.
81
+ """
82
+ if not emoji_font_name:
83
+ return text
84
 
85
+ # Regex to find emojis
86
  emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
87
  f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
88
  f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
89
+ f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
90
+ f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
91
+
92
+ # Regex to split the string by existing tags (<b>, <i>)
93
+ tag_pattern = re.compile(r"(<[^>]+>)")
94
+ parts = tag_pattern.split(text)
95
+
96
+ result = []
97
+ for part in parts:
98
+ if tag_pattern.match(part):
99
+ # It's a tag, append it as is
100
+ result.append(part)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  else:
102
+ # It's text, apply emoji font to any emojis within this segment
103
+ result.append(emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', part))
104
+
105
+ return "".join(result)
106
+
107
+
108
+ # --- PDF Generation & Handling ---
109
+
110
+ def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
111
+ """
112
+ Converts markdown to a ReportLab story, with enhanced styling and page breaks.
113
+ This version correctly separates structural parsing from content formatting.
114
+ """
115
+ styles = getSampleStyleSheet()
116
+ # Define styles for various markdown elements
117
+ style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, leading=14, fontSize=10)
118
+ style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=24, leading=28, textColor=colors.darkblue)
119
+ style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, fontSize=18, leading=22, spaceBefore=10)
120
+ style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, fontSize=14, leading=18, spaceBefore=8)
121
+ style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8, leading=12, fontSize=9)
122
+ style_table_header = ParagraphStyle('TableHeader', parent=style_normal, fontName=font_name + "-Bold" if font_name != 'Helvetica' else 'Helvetica-Bold')
123
+
124
+ story = []
125
+ lines = markdown_text.split('\n')
126
+
127
+ in_code_block, in_table = False, False
128
+ code_block_text, table_data = "", []
129
+ first_heading = True
130
+
131
+ for line in lines:
132
+ stripped_line = line.strip()
133
+
134
+ if stripped_line.startswith("```"):
135
+ if in_code_block:
136
+ story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)); story.append(Spacer(1, 0.1 * inch))
137
+ in_code_block = False; code_block_text = ""
138
+ else: in_code_block = True
139
+ continue
140
+ if in_code_block:
141
+ code_block_text += line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') + '\n'
142
+ continue
143
+
144
+ if stripped_line.startswith('|'):
145
+ if not in_table: in_table = True
146
+ if all(c in '-|: ' for c in stripped_line): continue
147
+ cells = [cell.strip() for cell in stripped_line.strip('|').split('|')]
148
+ table_data.append(cells)
149
+ continue
150
+ if in_table:
151
+ in_table = False
152
+ if table_data:
153
+ header_content = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in table_data[0]]
154
+ header = [Paragraph(cell, style_table_header) for cell in header_content]
155
+
156
+ formatted_rows = []
157
+ for row in table_data[1:]:
158
+ formatted_cells = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in row]
159
+ formatted_rows.append([Paragraph(cell, style_normal) for cell in formatted_cells])
160
+
161
+ table = Table([header] + formatted_rows, hAlign='LEFT', repeatRows=1)
162
+ table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.darkgrey), ('VALIGN', (0,0), (-1,-1), 'MIDDLE')]))
163
+ story.append(table); story.append(Spacer(1, 0.2 * inch))
164
+ table_data = []
165
+
166
+ if not stripped_line:
167
+ story.append(Spacer(1, 0.1 * inch))
168
+ continue
169
+
170
+ # Default content is the whole stripped line
171
+ content = stripped_line
172
+ style = style_normal
173
+ extra_args = {}
174
+
175
+ # Detect structural elements and extract the raw content
176
+ if stripped_line.startswith("# "):
177
+ if not first_heading: story.append(PageBreak())
178
+ content = stripped_line.lstrip('# '); style = style_h1; first_heading = False
179
+ elif stripped_line.startswith("## "):
180
+ content = stripped_line.lstrip('## '); style = style_h2
181
+ elif stripped_line.startswith("### "):
182
+ content = stripped_line.lstrip('### '); style = style_h3
183
+ elif stripped_line.startswith(("- ", "* ")):
184
+ content = stripped_line[2:]; extra_args['bulletText'] = 'β€’'
185
+
186
+ # Now, format the extracted content
187
+ # Apply markdown formatting for bold/italic
188
+ formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
189
+ # Then apply emoji font to the already formatted line
190
+ final_content = apply_emoji_font(formatted_content, emoji_font)
191
+
192
+ story.append(Paragraph(final_content, style, **extra_args))
193
  return story
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  def create_pdf_preview(pdf_path: Path):
197
  preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
198
  try:
199
+ doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
200
+ pix.save(str(preview_path)); doc.close()
 
 
 
201
  return str(preview_path)
202
  except Exception as e:
203
+ print(f"Could not create preview for {pdf_path.name}: {e}"); return None
 
204
 
205
  # --- Main API Function ---
206
+ def generate_pdfs_api(files, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
207
+ if not files: raise gr.Error("Please upload at least one Markdown or Image file.")
208
+ if not layouts: raise gr.Error("Please select at least one page layout.")
209
+ if not fonts: raise gr.Error("Please select at least one font.")
210
 
211
  shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
212
  OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
 
213
 
214
+ grouped_files = defaultdict(lambda: {'md': [], 'img': []})
 
215
  for f in files:
216
+ file_path = Path(f.name)
217
+ stem = file_path.stem.split('_')[0] if '_' in file_path.stem else file_path.stem
218
+ if file_path.suffix.lower() == '.md': grouped_files[stem]['md'].append(file_path)
219
+ elif file_path.suffix.lower() in ['.png', '.jpg', '.jpeg']: grouped_files[stem]['img'].append(file_path)
 
 
220
 
221
+ log_updates, generated_pdf_paths = "", []
222
+
223
+ for stem, assets in progress.tqdm(grouped_files.items(), desc="Processing File Groups"):
224
+ for layout_name in layouts:
225
+ for font_name in fonts:
226
+ merger = PdfWriter()
227
+
228
+ if assets['md']:
229
+ md_content = "\n".join([p.read_text(encoding='utf-8') for p in assets['md']])
230
+ md_buffer = io.BytesIO()
231
+ story = markdown_to_story(md_content, font_name, EMOJI_FONT_NAME)
232
+
233
+ base_w, base_h = LAYOUTS[layout_name]["size"]
234
+ pagesize = (base_w * page_w_mult, base_h * page_h_mult)
235
+
236
+ if num_columns > 1:
237
+ doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
238
+ frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
239
+ frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
240
+ doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
241
+ else:
242
+ doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
243
+ doc.build(story)
244
+ merger.append(fileobj=md_buffer)
 
 
 
 
 
 
 
245
 
246
+ for img_path in assets['img']:
247
+ with Image.open(img_path) as img: img_width, img_height = img.size
248
+ img_buffer = io.BytesIO()
249
+ doc = SimpleDocTemplate(img_buffer, pagesize=(img_width, img_height), leftMargin=0, rightMargin=0, topMargin=0, bottomMargin=0)
250
+ doc.build([ReportLabImage(img_path, width=img_width, height=img_height)])
251
+ merger.append(fileobj=img_buffer)
252
+
253
+ if len(merger.pages) > 0:
254
+ time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
255
+ filename = f"{stem}_{time_str}_{layout_name.replace(' ','-')}_{page_w_mult}x{page_h_mult}_{font_name}_Cols{num_columns}.pdf"
256
+ output_path = OUTPUT_DIR / filename
257
+ with open(output_path, "wb") as f: merger.write(f)
258
+ generated_pdf_paths.append(output_path)
259
+ log_updates += f"Generated: {filename}\n"
260
+
261
+ gallery_previews = [create_pdf_preview(p) for p in generated_pdf_paths]
262
+ final_gallery = [g for g in gallery_previews if g is not None]
263
 
264
+ return final_gallery, log_updates, [str(p) for p in generated_pdf_paths]
265
 
266
  # --- Gradio UI Definition ---
267
+ AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
268
+ SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons
269
+
270
+ 1. πŸ“œ **Introduction**
271
+ - **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals!
272
+ - **Usage**: A guide for learning and storytelling across traditions. ️
273
+ - **Themes**: Justice βš–οΈ, faith πŸ™, hubris πŸ›οΈ, redemption ✨, cosmic order 🌌.
274
+
275
+ 2. πŸ› οΈ **Core Concepts of Divinity**
276
+ - **Powers**: Creation 🌍, omniscience πŸ‘οΈβ€πŸ—¨οΈ, shapeshifting πŸ¦‹ across entities.
277
+ - **Life Cycle**: Mortality ⏳, immortality ♾️, transitions like saints and avatars 🌟.
278
+ - **Communication**: Omens πŸ•ŠοΈ, visions πŸ‘οΈ, miracles ✨ from gods and spirits.
279
+
280
+ # βš”οΈ Arthurian Legends
281
+ - **Merlin, Morgan le Fay, Arthur**: Mentor πŸ§™, rival πŸ§™β€β™€οΈ, son πŸ‘‘.
282
+ - **Relation**: Family tests loyalty 🀝.
283
+ - **Lesson**: Honor πŸŽ–οΈ vs. betrayal πŸ—‘οΈ.
284
+
285
+ # πŸ›οΈ Greek Mythology
286
+ - **Zeus, Hera, Athena**: Father ⚑, mother πŸ‘‘, daughter πŸ¦‰.
287
+ - **Relation**: Family rules with tension 🌩️.
288
+ - **Lesson**: Hubris ΰ€…ΰ€Ήΰ€‚ΰ€•ΰ€Ύΰ€° meets wisdom 🧠.
289
+
290
+ # πŸ•‰οΈ Hindu Trimurti
291
+ - **Brahma, Vishnu, Shiva**: Creator Brahma, preserver Vishnu, destroyer Shiva.
292
+ - **Relation**: Divine trio cycles existence πŸ”„.
293
+ - **Lesson**: Balance βš–οΈ sustains life πŸ’–.
294
+ """
295
+ with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN)
296
+
297
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
298
+ gr.Markdown("# πŸ“„ Advanced PDF Layout Engine")
299
+ gr.Markdown("Upload Markdown/Image files. The app finds local `.ttf` fonts. Group assets with a common name (e.g., `Doc_part1.md`, `Doc_img1.png`) to combine them. `# Headers` create automatic page breaks.")
300
 
301
  with gr.Row():
302
  with gr.Column(scale=1):
303
  gr.Markdown("### βš™οΈ Generation Settings")
304
+ uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg", ".jpeg"])
 
305
 
306
+ with gr.Row():
307
+ page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
308
+ page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
 
309
 
310
+ num_columns_slider = gr.Slider(label="Number of Text Columns", minimum=1, maximum=4, step=1, value=1)
311
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Base Page Layout", value=["A4 Portrait"])
312
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
313
+ generate_btn = gr.Button("πŸš€ Generate PDFs", variant="primary")
314
 
315
  with gr.Column(scale=2):
316
+ gr.Markdown("### πŸ–ΌοΈ PDF Preview Gallery")
317
+ gallery_output = gr.Gallery(label="Generated PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
318
+ log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
319
+ downloadable_files_output = gr.Files(label="Download Generated PDFs")
320
 
321
+ generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider], outputs=[gallery_output, log_output, downloadable_files_output])
 
 
 
 
 
 
 
322
 
323
  if __name__ == "__main__":
324
+ demo.launch()