awacke1 commited on
Commit
96b5748
·
verified ·
1 Parent(s): 182d632

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +260 -283
app.py CHANGED
@@ -4,344 +4,321 @@ import datetime
4
  import re
5
  import os
6
  import shutil
7
- import io
8
- import base64
9
- from collections import defaultdict
10
  from PIL import Image
11
- import json
12
-
13
- # Document Generation Libs
14
- from docx import Document
15
- import openpyxl
16
  from pypdf import PdfWriter
17
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, BaseDocTemplate, Frame, PageTemplate
 
18
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
19
- from reportlab.lib.pagesizes import letter, A4, landscape
20
  from reportlab.lib.units import inch
 
21
  from reportlab.pdfbase import pdfmetrics
22
  from reportlab.pdfbase.ttfonts import TTFont
23
 
24
- # Media Libs
25
- import fitz # PyMuPDF
26
-
27
  # --- Configuration & Setup ---
28
  CWD = Path.cwd()
29
- OUTPUT_DIR = CWD / "generated_outputs"
 
 
 
 
 
 
 
 
30
  PREVIEW_DIR = CWD / "previews"
31
- UPLOAD_DIR = CWD / "uploads"
32
  FONT_DIR = CWD
33
 
34
  # Create necessary directories
35
  OUTPUT_DIR.mkdir(exist_ok=True)
36
  PREVIEW_DIR.mkdir(exist_ok=True)
37
- UPLOAD_DIR.mkdir(exist_ok=True)
38
 
39
- LAYOUTS = {
40
- "A4 Portrait": {"size": A4},
41
- "A4 Landscape": {"size": landscape(A4)},
42
- "Letter Portrait": {"size": letter},
43
- "Letter Landscape": {"size": landscape(letter)},
44
- }
45
 
46
- # --- ✍️ Document Generation Engines ---
47
-
48
- def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
49
- """📄 Builds a beautiful PDF from a Markdown story using ReportLab."""
50
- pdf_buffer = io.BytesIO()
51
- story = markdown_to_story(md_content, font_name, emoji_font)
52
- if num_columns > 1:
53
- doc = BaseDocTemplate(pdf_buffer, pagesize=pagesize, leftMargin=0.5 * inch, rightMargin=0.5 * inch)
54
- frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1 * inch
55
- frames = [Frame(doc.leftMargin + i * (frame_width + 0.2 * inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
56
- doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
57
- else:
58
- doc = SimpleDocTemplate(pdf_buffer, pagesize=pagesize)
59
- doc.build(story)
60
- pdf_buffer.seek(0)
61
- return pdf_buffer
62
-
63
- def create_docx(md_content):
64
- """📝 Crafts a DOCX document, translating Markdown to Word elements."""
65
- document = Document()
66
- for line in md_content.split('\n'):
67
- if line.startswith('# '): document.add_heading(line[2:], level=1)
68
- elif line.startswith('## '): document.add_heading(line[3:], level=2)
69
- elif line.strip().startswith(('- ', '* ')): document.add_paragraph(line.strip()[2:], style='List Bullet')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  else:
71
- p = document.add_paragraph()
72
- parts = re.split(r'(\*\*.*?\*\*)', line)
73
- for part in parts:
74
- if part.startswith('**') and part.endswith('**'): p.add_run(part[2:-2]).bold = True
75
- else: p.add_run(part)
76
- return document
77
-
78
- def create_xlsx(md_content):
79
- """📊 Organizes a Markdown outline into columns in an XLSX file."""
80
- workbook = openpyxl.Workbook(); sheet = workbook.active
81
- sections = re.split(r'\n# ', '\n' + md_content)
82
- if sections and sections[0] == '': sections.pop(0)
83
- column_data = []
84
- for section in sections:
85
- lines = section.split('\n'); header = lines[0]
86
- content = [l.strip() for l in lines[1:] if l.strip()]
87
- column_data.append({'header': header, 'content': content})
88
- for c_idx, col in enumerate(column_data, 1):
89
- sheet.cell(row=1, column=c_idx, value=col['header'])
90
- for r_idx, line_content in enumerate(col['content'], 2):
91
- sheet.cell(row=r_idx, column=c_idx, value=line_content)
92
- return workbook
93
 
94
  def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
95
- """📜 Translates Markdown text into a sequence of ReportLab flowables for PDF rendering."""
 
 
 
96
  styles = getSampleStyleSheet()
97
- bold_font = f"{font_name}-Bold" if font_name != "Helvetica" else "Helvetica-Bold"
98
- style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10, leading=14)
99
- style_h1 = ParagraphStyle('h1', fontName=bold_font, spaceBefore=12, fontSize=24, textColor=colors.HexColor("#1E3A8A"))
100
- style_h2 = ParagraphStyle('h2', fontName=bold_font, spaceBefore=10, fontSize=18, textColor=colors.HexColor("#374151"))
101
- style_h3 = ParagraphStyle('h3', fontName=bold_font, spaceBefore=8, fontSize=14, textColor=colors.HexColor("#4B5563"))
 
 
 
 
 
102
 
103
- story, first_heading = [], True
104
- for line in markdown_text.split('\n'):
 
 
 
105
  stripped_line = line.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  if not stripped_line:
107
- story.append(Spacer(1, 0.1 * inch)); continue
 
108
 
109
- content, style, extra_args = stripped_line, style_normal, {}
 
 
 
 
 
110
  if stripped_line.startswith("# "):
111
  if not first_heading: story.append(PageBreak())
112
- content, style, first_heading = stripped_line.lstrip('# '), style_h1, False
113
- elif stripped_line.startswith("## "): content, style = stripped_line.lstrip('## '), style_h2
114
- elif stripped_line.startswith("### "): content, style = stripped_line.lstrip('### '), style_h3
115
- elif stripped_line.startswith(("- ", "* ")): content, extra_args = stripped_line[2:], {'bulletText': '•'}
 
 
 
116
 
 
 
117
  formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
 
118
  final_content = apply_emoji_font(formatted_content, emoji_font)
119
 
120
  story.append(Paragraph(final_content, style, **extra_args))
121
  return story
122
 
123
 
124
- # --- 🔮 Virtual AI Omni-Model Functions ---
125
-
126
- def process_text_input(prompt):
127
- """💬 Simulates an AI response to a text prompt."""
128
- return f"# Virtual AI Response\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n- This is a simulated response for your text input.\n- Here's an emoji: 😊"
129
-
130
- def process_image_input(image_path, prompt):
131
- """🖼️ Simulates an AI description of an image."""
132
- return f"# Virtual AI Image Analysis: {Path(image_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n1. Simulated analysis of the uploaded image.\n2. File type appears to be `{Path(image_path).suffix}`."
133
-
134
- def process_audio_input(audio_path, prompt):
135
- """🎤 Simulates AI transcription and summarization of an audio file."""
136
- return f"# Virtual AI Audio Summary: {Path(audio_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Simulated Transcription:**\n> \"This is a test of the emergency broadcast system.\"\n\n**Generated Summary:**\nThe audio is a test broadcast."
137
-
138
- def process_pdf_input(pdf_path, prompt, progress):
139
- """📄 Simulates AI-powered OCR of a PDF document."""
140
- progress(0.5, desc="Simulating PDF page processing...")
141
- ocr_text = f"# Virtual AI OCR of: {Path(pdf_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Extracted Content (Simulated):**\n- **Page 1:** Simulated text from the first page.\n- **Page 2:** Simulated text from the second page."
142
- progress(1.0, desc="PDF OCR Simulation Complete!")
143
- return ocr_text
144
-
145
-
146
- # --- 🛠️ Helpers & Main API ---
147
-
148
- def register_local_fonts():
149
- """✒️ Scans for local .ttf fonts and registers them for PDF creation."""
150
- text_font_names, emoji_font_name = [], None
151
- font_files = list(FONT_DIR.glob("*.ttf"))
152
- for font_path in font_files:
153
- try:
154
- font_name = font_path.stem
155
- pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
156
- pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
157
- pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold")
158
- if "notocoloremoji-regular" in font_name.lower():
159
- emoji_font_name = font_name
160
- else:
161
- text_font_names.append(font_name)
162
- except: pass
163
- if not text_font_names: text_font_names.append('Helvetica')
164
- return sorted(text_font_names), emoji_font_name
165
-
166
- def apply_emoji_font(text: str, emoji_font_name: str) -> str:
167
- """😊 Finds emojis and wraps them in special font tags for the PDF."""
168
- if not emoji_font_name: return text
169
- emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
170
- f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}]+)")
171
- return emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', text)
172
-
173
  def create_pdf_preview(pdf_path: Path):
174
- """🏞️ Generates a PNG thumbnail for the first page of a PDF."""
175
  preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
176
  try:
177
- doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap(dpi=96)
178
  pix.save(str(preview_path)); doc.close()
179
- return preview_path
180
- except: return None
 
181
 
182
- def build_file_explorer_html(generated_files, pdf_files_for_gallery):
183
- """🗂️ Constructs the HTML/JS for the file explorer and PDF gallery."""
184
-
185
- file_explorer_html = ""
186
- file_icons = {".pdf": "📄", ".docx": "📝", ".xlsx": "📊"}
187
- action_buttons = """
188
- <div class="actions">
189
- <button class="action-btn" onclick="event.preventDefault(); alert('QuizMe: Feature coming soon!')">🧠 QuizMe</button>
190
- <button class="action-btn" onclick="event.preventDefault(); alert('Revise: Feature coming soon!')">✍️ Revise</button>
191
- </div>
192
- """
193
- for file_path in generated_files:
194
- icon = file_icons.get(file_path.suffix, '📎')
195
- file_explorer_html += f"""
196
- <div class="file-item">
197
- <a href="/file={file_path}" class="file-link" download="{file_path.name}">
198
- <span class="file-icon">{icon}</span>
199
- <span class="file-name">{file_path.name}</span>
200
- </a>
201
- {action_buttons if file_path.suffix == '.pdf' else ''}
202
- </div>
203
- """
204
-
205
- gallery_items = []
206
- for pdf_path in pdf_files_for_gallery:
207
- preview_path = create_pdf_preview(pdf_path)
208
- if preview_path:
209
- with open(preview_path, "rb") as f:
210
- img_base64 = base64.b64encode(f.read()).decode("utf-8")
211
- gallery_items.append({"preview_src": f"data:image/png;base64,{img_base64}", "filename": pdf_path.name})
212
-
213
- gallery_html = ""
214
- if gallery_items:
215
- thumbs_html = "".join([f'<img src="{item["preview_src"]}" class="thumbnail" onclick="selectThumbnail(this, \'{item["preview_src"]}\', \'{item["filename"]}\')">' for item in gallery_items])
216
- gallery_html = f"""
217
- <div class="gallery-container">
218
- <div class="main-view"><img id="main-image" src="{gallery_items[0]['preview_src']}" class="main-image"></div>
219
- <div class="thumbnail-strip">{thumbs_html}</div>
220
- </div>
221
- <p id="main-filename">{gallery_items[0]['filename']}</p>
222
- """
223
-
224
- html = f"""
225
- <style>
226
- .tabs {{ display: flex; border-bottom: 2px solid #ccc; }}
227
- .tab-button {{ padding: 10px 15px; cursor: pointer; background: #f1f1f1; border: none; font-size: 1.2em; }}
228
- .tab-button.active {{ background: #fff; border-top: 2px solid #6366f1; border-left: 1px solid #ccc; border-right: 1px solid #ccc; position: relative; top: 1px;}}
229
- .tab-content {{ display: none; padding: 15px; border: 1px solid #ccc; border-top: none; }}
230
- .tab-content.active {{ display: block; }}
231
- .file-explorer {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 15px; }}
232
- .file-item {{ background: #f9f9f9; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
233
- .file-link {{ display: flex; align-items: center; padding: 15px; text-decoration: none; color: #333; }}
234
- .file-icon {{ font-size: 3em; margin-right: 15px; }}
235
- .file-name {{ font-weight: bold; word-break: break-all; }}
236
- .actions {{ display: flex; justify-content: space-around; padding: 5px 10px; border-top: 1px solid #eee; }}
237
- .action-btn {{ background: none; border: none; cursor: pointer; font-size: 1.1em; }}
238
- .gallery-container {{ display: flex; height: 500px; }}
239
- .main-view {{ flex: 4; }} .main-image {{ width: 100%; height: 100%; object-fit: contain; }}
240
- .thumbnail-strip {{ flex: 1; overflow-y: auto; }} .thumbnail {{ width: 100%; margin-bottom: 5px; cursor: pointer; border: 3px solid transparent; }}
241
- .thumbnail.active {{ border-color: #6366f1; }}
242
- #main-filename {{ text-align: center; font-weight: bold; margin-top: 10px; }}
243
- </style>
244
- <div class="tabs">
245
- <button class="tab-button active" onclick="openTab(event, 'explorer')">🗂️ FileExplorer</button>
246
- {'<button class="tab-button" onclick="openTab(event, \'gallery\')">🖼️ GlimpsePDFs</button>' if gallery_items else ''}
247
- </div>
248
- <div id="explorer" class="tab-content active"><div class="file-explorer">{file_explorer_html}</div></div>
249
- <div id="gallery" class="tab-content">{gallery_html}</div>
250
- <script>
251
- function openTab(evt, tabName) {{
252
- let i, tabcontent = document.getElementsByClassName("tab-content"), tablinks = document.getElementsByClassName("tab-button");
253
- for (i = 0; i < tabcontent.length; i++) tabcontent[i].style.display = "none";
254
- for (i = 0; i < tablinks.length; i++) tablinks[i].className = tablinks[i].className.replace(" active", "");
255
- document.getElementById(tabName).style.display = "block"; evt.currentTarget.className += " active";
256
- }}
257
- const mainImage = document.getElementById('main-image'), mainFilename = document.getElementById('main-filename'), thumbnails = document.querySelectorAll('.thumbnail');
258
- if (thumbnails.length > 0) thumbnails[0].classList.add('active');
259
- function selectThumbnail(thumb, src, name) {{
260
- mainImage.src = src; mainFilename.textContent = name;
261
- thumbnails.forEach(t => t.classList.remove('active')); thumb.classList.add('active');
262
- }};
263
- </script>
264
- """
265
- return html
266
 
267
- def generate_outputs_api(omni_file, omni_prompt, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
268
- if not omni_prompt and not omni_file: raise gr.Error("Please provide a prompt or upload at least one file.")
269
- if not output_formats: raise gr.Error("Please select at least one output format.")
270
-
271
  shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
272
  OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
273
 
274
- md_content = ""
275
- if omni_file:
276
- temp_path = UPLOAD_DIR / Path(omni_file.name).name
277
- shutil.copyfile(omni_file.name, temp_path)
278
- file_ext = temp_path.suffix.lower()
279
-
280
- if file_ext == '.md': md_content = temp_path.read_text(encoding='utf-8')
281
- elif file_ext == '.pdf': md_content = process_pdf_input(temp_path, omni_prompt or "Extract text", progress)
282
- elif file_ext in ['.png', '.jpg', '.jpeg']: md_content = process_image_input(temp_path, omni_prompt or "Describe image")
283
- elif file_ext in ['.wav', '.mp3']: md_content = process_audio_input(temp_path, omni_prompt or "Summarize transcription")
284
- elif omni_prompt:
285
- md_content = process_text_input(omni_prompt)
286
-
287
- if not md_content: raise gr.Error("Failed to generate source content.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
- generated_files, pdf_files_for_gallery = [], []
290
- for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
291
- time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
292
- if format_choice == "PDF":
293
- for layout_name in layouts:
294
- for font_name in fonts:
295
- pagesize = LAYOUTS[layout_name]["size"]
296
- final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
297
- pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
298
- filename = f"Document_{time_str}.pdf"; output_path = OUTPUT_DIR / filename
299
- with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
300
- generated_files.append(output_path); pdf_files_for_gallery.append(output_path)
301
- elif format_choice == "DOCX":
302
- doc = create_docx(md_content); filename = f"Document_{time_str}.docx"
303
- output_path = OUTPUT_DIR / filename; doc.save(output_path); generated_files.append(output_path)
304
- elif format_choice == "XLSX":
305
- book = create_xlsx(md_content); filename = f"Outline_{time_str}.xlsx"
306
- output_path = OUTPUT_DIR / filename; book.save(output_path); generated_files.append(output_path)
307
-
308
- final_html_output = build_file_explorer_html(generated_files, pdf_files_for_gallery)
309
-
310
- return md_content, final_html_output
311
 
312
- # --- 🎨 Gradio UI Definition ---
313
  AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
314
-
315
- with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as demo:
316
- gr.Markdown("# 🧠 Omni-Model Document Generator")
317
- gr.Markdown("Provide a prompt or upload a file (MD, PDF, Image, Audio). A virtual AI will process it, and you can generate documents from the result.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
  with gr.Row():
320
  with gr.Column(scale=1):
321
- gr.Markdown("### ⚙️ Input & Settings")
322
- omni_prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Ask a question or provide instructions...")
323
- omni_file = gr.File(label="Upload File (Optional)", file_types=["image", ".wav", ".mp3", ".md", ".pdf"])
324
-
325
- output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
326
 
327
- with gr.Accordion("PDF Customization", open=False):
328
- num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
329
  page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
330
  page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
331
- selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["A4 Portrait"])
332
- selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
333
 
334
- generate_btn = gr.Button("🚀 Generate Documents", variant="primary")
 
 
 
335
 
336
  with gr.Column(scale=2):
337
- gr.Markdown("### 🤖 AI Response (Source for Documents)")
338
- ai_response_output = gr.Markdown(label="AI Generated Content")
339
- gr.Markdown("### 🗂️ Generated Files")
340
- file_explorer_output = gr.HTML(label="File Explorer & Gallery")
341
-
342
- generate_btn.click(fn=generate_outputs_api,
343
- inputs=[omni_file, omni_prompt, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
344
- outputs=[ai_response_output, file_explorer_output])
345
 
346
  if __name__ == "__main__":
347
- demo.launch(share=True)
 
4
  import re
5
  import os
6
  import shutil
7
+ import fitz # PyMuPDF
 
 
8
  from PIL import Image
9
+ from collections import defaultdict
10
+ import io
 
 
 
11
  from pypdf import PdfWriter
12
+
13
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
14
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
15
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
16
  from reportlab.lib.units import inch
17
+ from reportlab.lib import colors
18
  from reportlab.pdfbase import pdfmetrics
19
  from reportlab.pdfbase.ttfonts import TTFont
20
 
 
 
 
21
  # --- Configuration & Setup ---
22
  CWD = Path.cwd()
23
+ LAYOUTS = {
24
+ "A4 Portrait": {"size": A4},
25
+ "A4 Landscape": {"size": landscape(A4)},
26
+ "Letter Portrait": {"size": letter},
27
+ "Letter Landscape": {"size": landscape(letter)},
28
+ "Legal Portrait": {"size": legal},
29
+ "Legal Landscape": {"size": landscape(legal)},
30
+ }
31
+ OUTPUT_DIR = CWD / "generated_pdfs"
32
  PREVIEW_DIR = CWD / "previews"
 
33
  FONT_DIR = CWD
34
 
35
  # Create necessary directories
36
  OUTPUT_DIR.mkdir(exist_ok=True)
37
  PREVIEW_DIR.mkdir(exist_ok=True)
 
38
 
 
 
 
 
 
 
39
 
40
+ # --- Font & Emoji Handling ---
41
+
42
+ def register_local_fonts():
43
+ """Finds and registers all .ttf files from the application's base directory."""
44
+ print("--- Font Registration Process Starting ---")
45
+ text_font_names = []
46
+ emoji_font_name = None
47
+
48
+ print(f"Scanning for fonts in: {FONT_DIR.absolute()}")
49
+ font_files = list(FONT_DIR.glob("*.ttf"))
50
+ print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
51
+
52
+ for font_path in font_files:
53
+ try:
54
+ font_name = font_path.stem
55
+ pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
56
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
57
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Italic", str(font_path)))
58
+ pdfmetrics.registerFont(TTFont(f"{font_name}-BoldItalic", str(font_path)))
59
+ pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold", italic=f"{font_name}-Italic", boldItalic=f"{font_name}-BoldItalic")
60
+
61
+ if "notocoloremoji-regular" in font_name.lower():
62
+ emoji_font_name = font_name
63
+ elif "notoemoji" not in font_name.lower(): # Exclude other symbol fonts from text selection
64
+ text_font_names.append(font_name)
65
+ except Exception as e:
66
+ print(f"Could not register font {font_path.name}: {e}")
67
+
68
+ if not text_font_names:
69
+ print("WARNING: No text fonts found. Adding 'Helvetica' as a default.")
70
+ text_font_names.append('Helvetica')
71
+
72
+ print(f"Successfully registered user-selectable fonts: {text_font_names}")
73
+ print(f"Emoji font set to: {emoji_font_name}")
74
+ print("--- Font Registration Process Finished ---")
75
+ return sorted(text_font_names), emoji_font_name
76
+
77
+ def apply_emoji_font(text: str, emoji_font_name: str) -> str:
78
+ """
79
+ Intelligently wraps emoji characters in a <font> tag, preserving existing HTML-like tags.
80
+ This prevents invalid nested tags like <b><font>...</font></b> which ReportLab handles poorly.
81
+ """
82
+ if not emoji_font_name:
83
+ return text
84
+
85
+ # Regex to find emojis
86
+ emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
87
+ f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
88
+ f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
89
+ f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
90
+ f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
91
+
92
+ # Regex to split the string by existing tags (<b>, <i>)
93
+ tag_pattern = re.compile(r"(<[^>]+>)")
94
+ parts = tag_pattern.split(text)
95
+
96
+ result = []
97
+ for part in parts:
98
+ if tag_pattern.match(part):
99
+ # It's a tag, append it as is
100
+ result.append(part)
101
  else:
102
+ # It's text, apply emoji font to any emojis within this segment
103
+ result.append(emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', part))
104
+
105
+ return "".join(result)
106
+
107
+
108
+ # --- PDF Generation & Handling ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
111
+ """
112
+ Converts markdown to a ReportLab story, with enhanced styling and page breaks.
113
+ This version correctly separates structural parsing from content formatting.
114
+ """
115
  styles = getSampleStyleSheet()
116
+ # Define styles for various markdown elements
117
+ style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, leading=14, fontSize=10)
118
+ style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=24, leading=28, textColor=colors.darkblue)
119
+ style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, fontSize=18, leading=22, spaceBefore=10)
120
+ style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, fontSize=14, leading=18, spaceBefore=8)
121
+ style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8, leading=12, fontSize=9)
122
+ style_table_header = ParagraphStyle('TableHeader', parent=style_normal, fontName=font_name + "-Bold" if font_name != 'Helvetica' else 'Helvetica-Bold')
123
+
124
+ story = []
125
+ lines = markdown_text.split('\n')
126
 
127
+ in_code_block, in_table = False, False
128
+ code_block_text, table_data = "", []
129
+ first_heading = True
130
+
131
+ for line in lines:
132
  stripped_line = line.strip()
133
+
134
+ if stripped_line.startswith("```"):
135
+ if in_code_block:
136
+ story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)); story.append(Spacer(1, 0.1 * inch))
137
+ in_code_block = False; code_block_text = ""
138
+ else: in_code_block = True
139
+ continue
140
+ if in_code_block:
141
+ code_block_text += line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') + '\n'
142
+ continue
143
+
144
+ if stripped_line.startswith('|'):
145
+ if not in_table: in_table = True
146
+ if all(c in '-|: ' for c in stripped_line): continue
147
+ cells = [cell.strip() for cell in stripped_line.strip('|').split('|')]
148
+ table_data.append(cells)
149
+ continue
150
+ if in_table:
151
+ in_table = False
152
+ if table_data:
153
+ header_content = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in table_data[0]]
154
+ header = [Paragraph(cell, style_table_header) for cell in header_content]
155
+
156
+ formatted_rows = []
157
+ for row in table_data[1:]:
158
+ formatted_cells = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in row]
159
+ formatted_rows.append([Paragraph(cell, style_normal) for cell in formatted_cells])
160
+
161
+ table = Table([header] + formatted_rows, hAlign='LEFT', repeatRows=1)
162
+ table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.darkgrey), ('VALIGN', (0,0), (-1,-1), 'MIDDLE')]))
163
+ story.append(table); story.append(Spacer(1, 0.2 * inch))
164
+ table_data = []
165
+
166
  if not stripped_line:
167
+ story.append(Spacer(1, 0.1 * inch))
168
+ continue
169
 
170
+ # Default content is the whole stripped line
171
+ content = stripped_line
172
+ style = style_normal
173
+ extra_args = {}
174
+
175
+ # Detect structural elements and extract the raw content
176
  if stripped_line.startswith("# "):
177
  if not first_heading: story.append(PageBreak())
178
+ content = stripped_line.lstrip('# '); style = style_h1; first_heading = False
179
+ elif stripped_line.startswith("## "):
180
+ content = stripped_line.lstrip('## '); style = style_h2
181
+ elif stripped_line.startswith("### "):
182
+ content = stripped_line.lstrip('### '); style = style_h3
183
+ elif stripped_line.startswith(("- ", "* ")):
184
+ content = stripped_line[2:]; extra_args['bulletText'] = '•'
185
 
186
+ # Now, format the extracted content
187
+ # Apply markdown formatting for bold/italic
188
  formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
189
+ # Then apply emoji font to the already formatted line
190
  final_content = apply_emoji_font(formatted_content, emoji_font)
191
 
192
  story.append(Paragraph(final_content, style, **extra_args))
193
  return story
194
 
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def create_pdf_preview(pdf_path: Path):
 
197
  preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
198
  try:
199
+ doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
200
  pix.save(str(preview_path)); doc.close()
201
+ return str(preview_path)
202
+ except Exception as e:
203
+ print(f"Could not create preview for {pdf_path.name}: {e}"); return None
204
 
205
+ # --- Main API Function ---
206
+ def generate_pdfs_api(files, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
207
+ if not files: raise gr.Error("Please upload at least one Markdown or Image file.")
208
+ if not layouts: raise gr.Error("Please select at least one page layout.")
209
+ if not fonts: raise gr.Error("Please select at least one font.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
 
 
 
 
211
  shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
212
  OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
213
 
214
+ grouped_files = defaultdict(lambda: {'md': [], 'img': []})
215
+ for f in files:
216
+ file_path = Path(f.name)
217
+ stem = file_path.stem.split('_')[0] if '_' in file_path.stem else file_path.stem
218
+ if file_path.suffix.lower() == '.md': grouped_files[stem]['md'].append(file_path)
219
+ elif file_path.suffix.lower() in ['.png', '.jpg', '.jpeg']: grouped_files[stem]['img'].append(file_path)
220
+
221
+ log_updates, generated_pdf_paths = "", []
222
+
223
+ for stem, assets in progress.tqdm(grouped_files.items(), desc="Processing File Groups"):
224
+ for layout_name in layouts:
225
+ for font_name in fonts:
226
+ merger = PdfWriter()
227
+
228
+ if assets['md']:
229
+ md_content = "\n".join([p.read_text(encoding='utf-8') for p in assets['md']])
230
+ md_buffer = io.BytesIO()
231
+ story = markdown_to_story(md_content, font_name, EMOJI_FONT_NAME)
232
+
233
+ base_w, base_h = LAYOUTS[layout_name]["size"]
234
+ pagesize = (base_w * page_w_mult, base_h * page_h_mult)
235
+
236
+ if num_columns > 1:
237
+ doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
238
+ frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
239
+ frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
240
+ doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
241
+ else:
242
+ doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
243
+ doc.build(story)
244
+ merger.append(fileobj=md_buffer)
245
+
246
+ for img_path in assets['img']:
247
+ with Image.open(img_path) as img: img_width, img_height = img.size
248
+ img_buffer = io.BytesIO()
249
+ doc = SimpleDocTemplate(img_buffer, pagesize=(img_width, img_height), leftMargin=0, rightMargin=0, topMargin=0, bottomMargin=0)
250
+ doc.build([ReportLabImage(img_path, width=img_width, height=img_height)])
251
+ merger.append(fileobj=img_buffer)
252
+
253
+ if len(merger.pages) > 0:
254
+ time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
255
+ filename = f"{stem}_{time_str}_{layout_name.replace(' ','-')}_{page_w_mult}x{page_h_mult}_{font_name}_Cols{num_columns}.pdf"
256
+ output_path = OUTPUT_DIR / filename
257
+ with open(output_path, "wb") as f: merger.write(f)
258
+ generated_pdf_paths.append(output_path)
259
+ log_updates += f"Generated: {filename}\n"
260
+
261
+ gallery_previews = [create_pdf_preview(p) for p in generated_pdf_paths]
262
+ final_gallery = [g for g in gallery_previews if g is not None]
263
 
264
+ return final_gallery, log_updates, [str(p) for p in generated_pdf_paths]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
+ # --- Gradio UI Definition ---
267
  AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
268
+ SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons
269
+
270
+ 1. 📜 **Introduction**
271
+ - **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals!
272
+ - **Usage**: A guide for learning and storytelling across traditions. ️
273
+ - **Themes**: Justice ⚖️, faith 🙏, hubris 🏛️, redemption ✨, cosmic order 🌌.
274
+
275
+ 2. 🛠️ **Core Concepts of Divinity**
276
+ - **Powers**: Creation 🌍, omniscience 👁️‍🗨️, shapeshifting 🦋 across entities.
277
+ - **Life Cycle**: Mortality ⏳, immortality ♾️, transitions like saints and avatars 🌟.
278
+ - **Communication**: Omens 🕊️, visions 👁️, miracles ✨ from gods and spirits.
279
+
280
+ # ⚔️ Arthurian Legends
281
+ - **Merlin, Morgan le Fay, Arthur**: Mentor 🧙, rival 🧙‍♀️, son 👑.
282
+ - **Relation**: Family tests loyalty 🤝.
283
+ - **Lesson**: Honor 🎖️ vs. betrayal 🗡️.
284
+
285
+ # 🏛️ Greek Mythology
286
+ - **Zeus, Hera, Athena**: Father ⚡, mother 👑, daughter 🦉.
287
+ - **Relation**: Family rules with tension 🌩️.
288
+ - **Lesson**: Hubris अहंकार meets wisdom 🧠.
289
+
290
+ # 🕉️ Hindu Trimurti
291
+ - **Brahma, Vishnu, Shiva**: Creator Brahma, preserver Vishnu, destroyer Shiva.
292
+ - **Relation**: Divine trio cycles existence 🔄.
293
+ - **Lesson**: Balance ⚖️ sustains life 💖.
294
+ """
295
+ with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN)
296
+
297
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
298
+ gr.Markdown("# 📄 Advanced PDF Layout Engine")
299
+ gr.Markdown("Upload Markdown/Image files. The app finds local `.ttf` fonts. Group assets with a common name (e.g., `Doc_part1.md`, `Doc_img1.png`) to combine them. `# Headers` create automatic page breaks.")
300
 
301
  with gr.Row():
302
  with gr.Column(scale=1):
303
+ gr.Markdown("### ⚙️ Generation Settings")
304
+ uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg", ".jpeg"])
 
 
 
305
 
306
+ with gr.Row():
 
307
  page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
308
  page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
 
 
309
 
310
+ num_columns_slider = gr.Slider(label="Number of Text Columns", minimum=1, maximum=4, step=1, value=1)
311
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Base Page Layout", value=["A4 Portrait"])
312
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
313
+ generate_btn = gr.Button("🚀 Generate PDFs", variant="primary")
314
 
315
  with gr.Column(scale=2):
316
+ gr.Markdown("### 🖼️ PDF Preview Gallery")
317
+ gallery_output = gr.Gallery(label="Generated PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
318
+ log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
319
+ downloadable_files_output = gr.Files(label="Download Generated PDFs")
320
+
321
+ generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider], outputs=[gallery_output, log_output, downloadable_files_output])
 
 
322
 
323
  if __name__ == "__main__":
324
+ demo.launch()