awacke1 commited on
Commit
6d55168
Β·
verified Β·
1 Parent(s): eb23ca7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -226
app.py CHANGED
@@ -10,6 +10,11 @@ from collections import defaultdict
10
  import io
11
  from pypdf import PdfWriter
12
 
 
 
 
 
 
13
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
14
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
  from reportlab.lib.pagesizes import letter, A4, legal, landscape
@@ -25,10 +30,8 @@ LAYOUTS = {
25
  "A4 Landscape": {"size": landscape(A4)},
26
  "Letter Portrait": {"size": letter},
27
  "Letter Landscape": {"size": landscape(letter)},
28
- "Legal Portrait": {"size": legal},
29
- "Legal Landscape": {"size": landscape(legal)},
30
  }
31
- OUTPUT_DIR = CWD / "generated_pdfs"
32
  PREVIEW_DIR = CWD / "previews"
33
  FONT_DIR = CWD
34
 
@@ -37,15 +40,11 @@ OUTPUT_DIR.mkdir(exist_ok=True)
37
  PREVIEW_DIR.mkdir(exist_ok=True)
38
 
39
 
40
- # --- Font & Emoji Handling ---
41
 
42
  def register_local_fonts():
43
  """Finds and registers all .ttf files from the application's base directory."""
44
- print("--- Font Registration Process Starting ---")
45
- text_font_names = []
46
- emoji_font_name = None
47
-
48
- print(f"Scanning for fonts in: {FONT_DIR.absolute()}")
49
  font_files = list(FONT_DIR.glob("*.ttf"))
50
  print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
51
 
@@ -54,271 +53,196 @@ def register_local_fonts():
54
  font_name = font_path.stem
55
  pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
56
  pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
57
- pdfmetrics.registerFont(TTFont(f"{font_name}-Italic", str(font_path)))
58
- pdfmetrics.registerFont(TTFont(f"{font_name}-BoldItalic", str(font_path)))
59
- pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold", italic=f"{font_name}-Italic", boldItalic=f"{font_name}-BoldItalic")
60
-
61
  if "notocoloremoji-regular" in font_name.lower():
62
  emoji_font_name = font_name
63
- elif "notoemoji" not in font_name.lower(): # Exclude other symbol fonts from text selection
64
  text_font_names.append(font_name)
65
  except Exception as e:
66
  print(f"Could not register font {font_path.name}: {e}")
67
-
68
- if not text_font_names:
69
- print("WARNING: No text fonts found. Adding 'Helvetica' as a default.")
70
- text_font_names.append('Helvetica')
71
-
72
- print(f"Successfully registered user-selectable fonts: {text_font_names}")
73
- print(f"Emoji font set to: {emoji_font_name}")
74
- print("--- Font Registration Process Finished ---")
75
  return sorted(text_font_names), emoji_font_name
76
 
77
  def apply_emoji_font(text: str, emoji_font_name: str) -> str:
78
- """
79
- Intelligently wraps emoji characters in a <font> tag, preserving existing HTML-like tags.
80
- This prevents invalid nested tags like <b><font>...</font></b> which ReportLab handles poorly.
81
- """
82
- if not emoji_font_name:
83
- return text
84
-
85
- # Regex to find emojis
86
  emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
87
- f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
88
- f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
89
- f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
90
- f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
91
-
92
- # Regex to split the string by existing tags (<b>, <i>)
93
- tag_pattern = re.compile(r"(<[^>]+>)")
94
- parts = tag_pattern.split(text)
95
-
96
- result = []
97
- for part in parts:
98
- if tag_pattern.match(part):
99
- # It's a tag, append it as is
100
- result.append(part)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  else:
102
- # It's text, apply emoji font to any emojis within this segment
103
- result.append(emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', part))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- return "".join(result)
106
-
107
-
108
- # --- PDF Generation & Handling ---
109
 
110
  def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
111
- """
112
- Converts markdown to a ReportLab story, with enhanced styling and page breaks.
113
- This version correctly separates structural parsing from content formatting.
114
- """
115
  styles = getSampleStyleSheet()
116
- # Define styles for various markdown elements
117
- style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, leading=14, fontSize=10)
118
- style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=24, leading=28, textColor=colors.darkblue)
119
- style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, fontSize=18, leading=22, spaceBefore=10)
120
- style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, fontSize=14, leading=18, spaceBefore=8)
121
- style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8, leading=12, fontSize=9)
122
- style_table_header = ParagraphStyle('TableHeader', parent=style_normal, fontName=font_name + "-Bold" if font_name != 'Helvetica' else 'Helvetica-Bold')
123
-
124
- story = []
125
- lines = markdown_text.split('\n')
126
-
127
- in_code_block, in_table = False, False
128
- code_block_text, table_data = "", []
129
- first_heading = True
130
-
131
- for line in lines:
132
- stripped_line = line.strip()
133
-
134
- if stripped_line.startswith("```"):
135
- if in_code_block:
136
- story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)); story.append(Spacer(1, 0.1 * inch))
137
- in_code_block = False; code_block_text = ""
138
- else: in_code_block = True
139
- continue
140
- if in_code_block:
141
- code_block_text += line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') + '\n'
142
- continue
143
-
144
- if stripped_line.startswith('|'):
145
- if not in_table: in_table = True
146
- if all(c in '-|: ' for c in stripped_line): continue
147
- cells = [cell.strip() for cell in stripped_line.strip('|').split('|')]
148
- table_data.append(cells)
149
- continue
150
- if in_table:
151
- in_table = False
152
- if table_data:
153
- header_content = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in table_data[0]]
154
- header = [Paragraph(cell, style_table_header) for cell in header_content]
155
-
156
- formatted_rows = []
157
- for row in table_data[1:]:
158
- formatted_cells = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in row]
159
- formatted_rows.append([Paragraph(cell, style_normal) for cell in formatted_cells])
160
-
161
- table = Table([header] + formatted_rows, hAlign='LEFT', repeatRows=1)
162
- table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.darkgrey), ('VALIGN', (0,0), (-1,-1), 'MIDDLE')]))
163
- story.append(table); story.append(Spacer(1, 0.2 * inch))
164
- table_data = []
165
-
166
- if not stripped_line:
167
- story.append(Spacer(1, 0.1 * inch))
168
- continue
169
-
170
- # Default content is the whole stripped line
171
- content = stripped_line
172
- style = style_normal
173
- extra_args = {}
174
-
175
- # Detect structural elements and extract the raw content
176
- if stripped_line.startswith("# "):
177
  if not first_heading: story.append(PageBreak())
178
- content = stripped_line.lstrip('# '); style = style_h1; first_heading = False
179
- elif stripped_line.startswith("## "):
180
- content = stripped_line.lstrip('## '); style = style_h2
181
- elif stripped_line.startswith("### "):
182
- content = stripped_line.lstrip('### '); style = style_h3
183
- elif stripped_line.startswith(("- ", "* ")):
184
- content = stripped_line[2:]; extra_args['bulletText'] = 'β€’'
185
-
186
- # Now, format the extracted content
187
- # Apply markdown formatting for bold/italic
188
- formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
189
- # Then apply emoji font to the already formatted line
190
  final_content = apply_emoji_font(formatted_content, emoji_font)
191
-
192
- story.append(Paragraph(final_content, style, **extra_args))
193
  return story
194
 
195
-
196
  def create_pdf_preview(pdf_path: Path):
197
  preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
198
  try:
199
  doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
200
  pix.save(str(preview_path)); doc.close()
201
  return str(preview_path)
202
- except Exception as e:
203
- print(f"Could not create preview for {pdf_path.name}: {e}"); return None
204
 
205
  # --- Main API Function ---
206
- def generate_pdfs_api(files, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
207
- if not files: raise gr.Error("Please upload at least one Markdown or Image file.")
208
- if not layouts: raise gr.Error("Please select at least one page layout.")
209
- if not fonts: raise gr.Error("Please select at least one font.")
210
 
211
  shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
212
  OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
213
 
214
- grouped_files = defaultdict(lambda: {'md': [], 'img': []})
215
- for f in files:
216
- file_path = Path(f.name)
217
- stem = file_path.stem.split('_')[0] if '_' in file_path.stem else file_path.stem
218
- if file_path.suffix.lower() == '.md': grouped_files[stem]['md'].append(file_path)
219
- elif file_path.suffix.lower() in ['.png', '.jpg', '.jpeg']: grouped_files[stem]['img'].append(file_path)
220
 
221
- log_updates, generated_pdf_paths = "", []
222
-
223
- for stem, assets in progress.tqdm(grouped_files.items(), desc="Processing File Groups"):
224
- for layout_name in layouts:
225
- for font_name in fonts:
226
- merger = PdfWriter()
227
-
228
- if assets['md']:
229
- md_content = "\n".join([p.read_text(encoding='utf-8') for p in assets['md']])
230
- md_buffer = io.BytesIO()
231
- story = markdown_to_story(md_content, font_name, EMOJI_FONT_NAME)
232
-
233
- base_w, base_h = LAYOUTS[layout_name]["size"]
234
- pagesize = (base_w * page_w_mult, base_h * page_h_mult)
235
 
236
- if num_columns > 1:
237
- doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
238
- frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
239
- frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
240
- doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
241
- else:
242
- doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
243
- doc.build(story)
244
- merger.append(fileobj=md_buffer)
245
-
246
- for img_path in assets['img']:
247
- with Image.open(img_path) as img: img_width, img_height = img.size
248
- img_buffer = io.BytesIO()
249
- doc = SimpleDocTemplate(img_buffer, pagesize=(img_width, img_height), leftMargin=0, rightMargin=0, topMargin=0, bottomMargin=0)
250
- doc.build([ReportLabImage(img_path, width=img_width, height=img_height)])
251
- merger.append(fileobj=img_buffer)
252
-
253
- if len(merger.pages) > 0:
254
- time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
255
- filename = f"{stem}_{time_str}_{layout_name.replace(' ','-')}_{page_w_mult}x{page_h_mult}_{font_name}_Cols{num_columns}.pdf"
256
  output_path = OUTPUT_DIR / filename
257
- with open(output_path, "wb") as f: merger.write(f)
258
- generated_pdf_paths.append(output_path)
259
- log_updates += f"Generated: {filename}\n"
260
-
261
- gallery_previews = [create_pdf_preview(p) for p in generated_pdf_paths]
262
- final_gallery = [g for g in gallery_previews if g is not None]
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
- return final_gallery, log_updates, [str(p) for p in generated_pdf_paths]
265
 
266
  # --- Gradio UI Definition ---
267
  AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
268
- SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons
269
-
270
- 1. πŸ“œ **Introduction**
271
- - **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals!
272
- - **Usage**: A guide for learning and storytelling across traditions. ️
273
- - **Themes**: Justice βš–οΈ, faith πŸ™, hubris πŸ›οΈ, redemption ✨, cosmic order 🌌.
274
-
275
- 2. πŸ› οΈ **Core Concepts of Divinity**
276
- - **Powers**: Creation 🌍, omniscience πŸ‘οΈβ€πŸ—¨οΈ, shapeshifting πŸ¦‹ across entities.
277
- - **Life Cycle**: Mortality ⏳, immortality ♾️, transitions like saints and avatars 🌟.
278
- - **Communication**: Omens πŸ•ŠοΈ, visions πŸ‘οΈ, miracles ✨ from gods and spirits.
279
-
280
- # βš”οΈ Arthurian Legends
281
- - **Merlin, Morgan le Fay, Arthur**: Mentor πŸ§™, rival πŸ§™β€β™€οΈ, son πŸ‘‘.
282
- - **Relation**: Family tests loyalty 🀝.
283
- - **Lesson**: Honor πŸŽ–οΈ vs. betrayal πŸ—‘οΈ.
284
-
285
- # πŸ›οΈ Greek Mythology
286
- - **Zeus, Hera, Athena**: Father ⚑, mother πŸ‘‘, daughter πŸ¦‰.
287
- - **Relation**: Family rules with tension 🌩️.
288
- - **Lesson**: Hubris ΰ€…ΰ€Ήΰ€‚ΰ€•ΰ€Ύΰ€° meets wisdom 🧠.
289
-
290
- # πŸ•‰οΈ Hindu Trimurti
291
- - **Brahma, Vishnu, Shiva**: Creator Brahma, preserver Vishnu, destroyer Shiva.
292
- - **Relation**: Divine trio cycles existence πŸ”„.
293
- - **Lesson**: Balance βš–οΈ sustains life πŸ’–.
294
- """
295
  with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN)
296
 
297
- with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
298
- gr.Markdown("# πŸ“„ Advanced PDF Layout Engine")
299
- gr.Markdown("Upload Markdown/Image files. The app finds local `.ttf` fonts. Group assets with a common name (e.g., `Doc_part1.md`, `Doc_img1.png`) to combine them. `# Headers` create automatic page breaks.")
300
 
301
  with gr.Row():
302
  with gr.Column(scale=1):
303
  gr.Markdown("### βš™οΈ Generation Settings")
304
- uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg", ".jpeg"])
 
 
305
 
306
- with gr.Row():
307
- page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
308
- page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
 
 
 
 
309
 
310
- num_columns_slider = gr.Slider(label="Number of Text Columns", minimum=1, maximum=4, step=1, value=1)
311
- selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Base Page Layout", value=["A4 Portrait"])
312
- selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
313
- generate_btn = gr.Button("πŸš€ Generate PDFs", variant="primary")
314
 
315
  with gr.Column(scale=2):
316
- gr.Markdown("### πŸ–ΌοΈ PDF Preview Gallery")
317
- gallery_output = gr.Gallery(label="Generated PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
318
- log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
319
- downloadable_files_output = gr.Files(label="Download Generated PDFs")
320
 
321
- generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider], outputs=[gallery_output, log_output, downloadable_files_output])
 
 
322
 
323
  if __name__ == "__main__":
324
  demo.launch()
 
10
  import io
11
  from pypdf import PdfWriter
12
 
13
+ # Imports for new formats
14
+ from docx import Document
15
+ from docx.shared import Inches
16
+ import openpyxl
17
+
18
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
19
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
20
  from reportlab.lib.pagesizes import letter, A4, legal, landscape
 
30
  "A4 Landscape": {"size": landscape(A4)},
31
  "Letter Portrait": {"size": letter},
32
  "Letter Landscape": {"size": landscape(letter)},
 
 
33
  }
34
+ OUTPUT_DIR = CWD / "generated_outputs"
35
  PREVIEW_DIR = CWD / "previews"
36
  FONT_DIR = CWD
37
 
 
40
  PREVIEW_DIR.mkdir(exist_ok=True)
41
 
42
 
43
+ # --- Font & Emoji Handling (for PDF) ---
44
 
45
  def register_local_fonts():
46
  """Finds and registers all .ttf files from the application's base directory."""
47
+ text_font_names, emoji_font_name = [], None
 
 
 
 
48
  font_files = list(FONT_DIR.glob("*.ttf"))
49
  print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
50
 
 
53
  font_name = font_path.stem
54
  pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
55
  pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
56
+ pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold")
 
 
 
57
  if "notocoloremoji-regular" in font_name.lower():
58
  emoji_font_name = font_name
59
+ else:
60
  text_font_names.append(font_name)
61
  except Exception as e:
62
  print(f"Could not register font {font_path.name}: {e}")
63
+ if not text_font_names: text_font_names.append('Helvetica')
 
 
 
 
 
 
 
64
  return sorted(text_font_names), emoji_font_name
65
 
66
  def apply_emoji_font(text: str, emoji_font_name: str) -> str:
67
+ if not emoji_font_name: return text
 
 
 
 
 
 
 
68
  emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
69
+ f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}]+)")
70
+ return emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', text)
71
+
72
+
73
+ # --- Document Generation Engines ---
74
+
75
+ def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
76
+ """Generates a PDF file from markdown content."""
77
+ md_buffer = io.BytesIO()
78
+ story = markdown_to_story(md_content, font_name, emoji_font)
79
+ if num_columns > 1:
80
+ doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch)
81
+ frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
82
+ frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
83
+ doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
84
+ else:
85
+ doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
86
+ doc.build(story)
87
+ return md_buffer
88
+
89
+ def create_docx(md_content):
90
+ """Generates a DOCX file from markdown content."""
91
+ document = Document()
92
+ for line in md_content.split('\n'):
93
+ if line.startswith('# '):
94
+ document.add_heading(line[2:], level=1)
95
+ elif line.startswith('## '):
96
+ document.add_heading(line[3:], level=2)
97
+ elif line.strip().startswith(('- ','* ')):
98
+ document.add_paragraph(line.strip()[2:], style='List Bullet')
99
  else:
100
+ p = document.add_paragraph()
101
+ # Handle bold and italics with runs
102
+ parts = re.split(r'(\*\*.*?\*\*|_.*?_)', line)
103
+ for part in parts:
104
+ if part.startswith('**') and part.endswith('**'):
105
+ p.add_run(part[2:-2]).bold = True
106
+ elif part.startswith('_') and part.endswith('_'):
107
+ p.add_run(part[1:-1]).italic = True
108
+ else:
109
+ p.add_run(part)
110
+ return document
111
+
112
+ def create_xlsx(md_content):
113
+ """Generates an XLSX file, splitting content by H1 headers into columns."""
114
+ workbook = openpyxl.Workbook()
115
+ sheet = workbook.active
116
+
117
+ # Split content by H1 headers
118
+ sections = re.split(r'\n# ', '\n' + md_content)
119
+ if sections[0] == '': sections.pop(0)
120
+
121
+ max_rows = 0
122
+ column_data = []
123
+
124
+ for section in sections:
125
+ lines = section.split('\n')
126
+ header = lines[0]
127
+ content = [l.strip() for l in lines[1:] if l.strip()]
128
+ column_data.append({'header': header, 'content': content})
129
+ if len(content) > max_rows:
130
+ max_rows = len(content)
131
+
132
+ for c_idx, col in enumerate(column_data, 1):
133
+ sheet.cell(row=1, column=c_idx, value=col['header'])
134
+ for r_idx, line_content in enumerate(col['content'], 2):
135
+ sheet.cell(row=r_idx, column=c_idx, value=line_content)
136
 
137
+ return workbook
 
 
 
138
 
139
  def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
140
+ """Converts markdown to a ReportLab story for PDF generation."""
 
 
 
141
  styles = getSampleStyleSheet()
142
+ style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10)
143
+ style_h1 = ParagraphStyle('h1', fontName=font_name, spaceBefore=12, fontSize=24)
144
+ story, first_heading = [], True
145
+ for line in markdown_text.split('\n'):
146
+ content, style = line, style_normal
147
+ if line.startswith("# "):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  if not first_heading: story.append(PageBreak())
149
+ content, style, first_heading = line.lstrip('# '), style_h1, False
150
+ formatted_content = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content)
 
 
 
 
 
 
 
 
 
 
151
  final_content = apply_emoji_font(formatted_content, emoji_font)
152
+ story.append(Paragraph(final_content, style))
 
153
  return story
154
 
 
155
  def create_pdf_preview(pdf_path: Path):
156
  preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
157
  try:
158
  doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
159
  pix.save(str(preview_path)); doc.close()
160
  return str(preview_path)
161
+ except: return None
 
162
 
163
  # --- Main API Function ---
164
+ def generate_outputs_api(files, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
165
+ if not files: raise gr.Error("Please upload at least one file.")
166
+ if not output_formats: raise gr.Error("Please select at least one output format.")
 
167
 
168
  shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
169
  OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
170
 
171
+ # Consolidate all markdown content
172
+ md_content = "\n\n# ".join([Path(f.name).read_text(encoding='utf-8') for f in files if Path(f.name).suffix.lower() == '.md'])
173
+ if not md_content.startswith("# "): md_content = "# " + md_content
 
 
 
174
 
175
+ log_updates, generated_files = "", []
176
+
177
+ for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
178
+ time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
179
+
180
+ if format_choice == "PDF":
181
+ for layout_name in layouts:
182
+ for font_name in fonts:
183
+ pagesize = LAYOUTS[layout_name]["size"]
184
+ final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
185
+ pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
 
 
 
186
 
187
+ filename = f"Document_{time_str}_{layout_name.replace(' ','-')}_{font_name}.pdf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  output_path = OUTPUT_DIR / filename
189
+ with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
190
+ generated_files.append(output_path)
191
+
192
+ elif format_choice == "DOCX":
193
+ docx_doc = create_docx(md_content)
194
+ filename = f"Document_{time_str}.docx"
195
+ output_path = OUTPUT_DIR / filename
196
+ docx_doc.save(output_path)
197
+ generated_files.append(output_path)
198
+
199
+ elif format_choice == "XLSX":
200
+ xlsx_book = create_xlsx(md_content)
201
+ filename = f"Outline_{time_str}.xlsx"
202
+ output_path = OUTPUT_DIR / filename
203
+ xlsx_book.save(output_path)
204
+ generated_files.append(output_path)
205
+
206
+ gallery_previews = [create_pdf_preview(p) for p in generated_files if p.suffix == '.pdf']
207
+ final_gallery = [g for g in gallery_previews if g]
208
 
209
+ return final_gallery, f"Generated {len(generated_files)} files.", [str(p) for p in generated_files]
210
 
211
  # --- Gradio UI Definition ---
212
  AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
213
+ SAMPLE_MARKDOWN = "# Deities Guide\n\n- **Purpose**: Explore deities and their morals! \n- **Themes**: Justice βš–οΈ, faith πŸ™\n\n# Arthurian Legends\n\n - **Merlin, Arthur**: Mentor πŸ§™, son πŸ‘‘.\n - **Lesson**: Honor πŸŽ–οΈ vs. betrayal πŸ—‘οΈ."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN)
215
 
216
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Document Generator") as demo:
217
+ gr.Markdown("# πŸ“„ Advanced Document Generator (PDF, DOCX, XLSX)")
218
+ gr.Markdown("Upload Markdown files to generate documents in multiple formats. `# Headers` create columns in XLSX and page breaks in multi-page PDFs.")
219
 
220
  with gr.Row():
221
  with gr.Column(scale=1):
222
  gr.Markdown("### βš™οΈ Generation Settings")
223
+ uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg"])
224
+
225
+ output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
226
 
227
+ with gr.Accordion("PDF Customization", open=True):
228
+ with gr.Row():
229
+ page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
230
+ page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
231
+ num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
232
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["A4 Portrait"])
233
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
234
 
235
+ generate_btn = gr.Button("πŸš€ Generate Documents", variant="primary")
 
 
 
236
 
237
  with gr.Column(scale=2):
238
+ gr.Markdown("### πŸ–ΌοΈ Output Files")
239
+ gallery_output = gr.Gallery(label="PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
240
+ log_output = gr.Markdown(label="Generation Log", value="Ready...")
241
+ downloadable_files_output = gr.Files(label="Download Generated Files")
242
 
243
+ generate_btn.click(fn=generate_outputs_api,
244
+ inputs=[uploaded_files, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
245
+ outputs=[gallery_output, log_output, downloadable_files_output])
246
 
247
  if __name__ == "__main__":
248
  demo.launch()