Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -4,344 +4,321 @@ import datetime
|
|
4 |
import re
|
5 |
import os
|
6 |
import shutil
|
7 |
-
import
|
8 |
-
import base64
|
9 |
-
from collections import defaultdict
|
10 |
from PIL import Image
|
11 |
-
import
|
12 |
-
|
13 |
-
# Document Generation Libs
|
14 |
-
from docx import Document
|
15 |
-
import openpyxl
|
16 |
from pypdf import PdfWriter
|
17 |
-
|
|
|
18 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
19 |
-
from reportlab.lib.pagesizes import letter, A4, landscape
|
20 |
from reportlab.lib.units import inch
|
|
|
21 |
from reportlab.pdfbase import pdfmetrics
|
22 |
from reportlab.pdfbase.ttfonts import TTFont
|
23 |
|
24 |
-
# Media Libs
|
25 |
-
import fitz # PyMuPDF
|
26 |
-
|
27 |
# --- Configuration & Setup ---
|
28 |
CWD = Path.cwd()
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
PREVIEW_DIR = CWD / "previews"
|
31 |
-
UPLOAD_DIR = CWD / "uploads"
|
32 |
FONT_DIR = CWD
|
33 |
|
34 |
# Create necessary directories
|
35 |
OUTPUT_DIR.mkdir(exist_ok=True)
|
36 |
PREVIEW_DIR.mkdir(exist_ok=True)
|
37 |
-
UPLOAD_DIR.mkdir(exist_ok=True)
|
38 |
|
39 |
-
LAYOUTS = {
|
40 |
-
"A4 Portrait": {"size": A4},
|
41 |
-
"A4 Landscape": {"size": landscape(A4)},
|
42 |
-
"Letter Portrait": {"size": letter},
|
43 |
-
"Letter Landscape": {"size": landscape(letter)},
|
44 |
-
}
|
45 |
|
46 |
-
# ---
|
47 |
-
|
48 |
-
def
|
49 |
-
"""
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
else:
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
def create_xlsx(md_content):
|
79 |
-
"""📊 Organizes a Markdown outline into columns in an XLSX file."""
|
80 |
-
workbook = openpyxl.Workbook(); sheet = workbook.active
|
81 |
-
sections = re.split(r'\n# ', '\n' + md_content)
|
82 |
-
if sections and sections[0] == '': sections.pop(0)
|
83 |
-
column_data = []
|
84 |
-
for section in sections:
|
85 |
-
lines = section.split('\n'); header = lines[0]
|
86 |
-
content = [l.strip() for l in lines[1:] if l.strip()]
|
87 |
-
column_data.append({'header': header, 'content': content})
|
88 |
-
for c_idx, col in enumerate(column_data, 1):
|
89 |
-
sheet.cell(row=1, column=c_idx, value=col['header'])
|
90 |
-
for r_idx, line_content in enumerate(col['content'], 2):
|
91 |
-
sheet.cell(row=r_idx, column=c_idx, value=line_content)
|
92 |
-
return workbook
|
93 |
|
94 |
def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
|
95 |
-
"""
|
|
|
|
|
|
|
96 |
styles = getSampleStyleSheet()
|
97 |
-
|
98 |
-
style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6,
|
99 |
-
style_h1 = ParagraphStyle('h1', fontName=
|
100 |
-
style_h2 = ParagraphStyle('h2',
|
101 |
-
style_h3 = ParagraphStyle('h3',
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
105 |
stripped_line = line.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
if not stripped_line:
|
107 |
-
story.append(Spacer(1, 0.1 * inch))
|
|
|
108 |
|
109 |
-
content
|
|
|
|
|
|
|
|
|
|
|
110 |
if stripped_line.startswith("# "):
|
111 |
if not first_heading: story.append(PageBreak())
|
112 |
-
content
|
113 |
-
elif stripped_line.startswith("## "):
|
114 |
-
|
115 |
-
elif stripped_line.startswith(
|
|
|
|
|
|
|
116 |
|
|
|
|
|
117 |
formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
|
|
|
118 |
final_content = apply_emoji_font(formatted_content, emoji_font)
|
119 |
|
120 |
story.append(Paragraph(final_content, style, **extra_args))
|
121 |
return story
|
122 |
|
123 |
|
124 |
-
# --- 🔮 Virtual AI Omni-Model Functions ---
|
125 |
-
|
126 |
-
def process_text_input(prompt):
|
127 |
-
"""💬 Simulates an AI response to a text prompt."""
|
128 |
-
return f"# Virtual AI Response\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n- This is a simulated response for your text input.\n- Here's an emoji: 😊"
|
129 |
-
|
130 |
-
def process_image_input(image_path, prompt):
|
131 |
-
"""🖼️ Simulates an AI description of an image."""
|
132 |
-
return f"# Virtual AI Image Analysis: {Path(image_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n1. Simulated analysis of the uploaded image.\n2. File type appears to be `{Path(image_path).suffix}`."
|
133 |
-
|
134 |
-
def process_audio_input(audio_path, prompt):
|
135 |
-
"""🎤 Simulates AI transcription and summarization of an audio file."""
|
136 |
-
return f"# Virtual AI Audio Summary: {Path(audio_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Simulated Transcription:**\n> \"This is a test of the emergency broadcast system.\"\n\n**Generated Summary:**\nThe audio is a test broadcast."
|
137 |
-
|
138 |
-
def process_pdf_input(pdf_path, prompt, progress):
|
139 |
-
"""📄 Simulates AI-powered OCR of a PDF document."""
|
140 |
-
progress(0.5, desc="Simulating PDF page processing...")
|
141 |
-
ocr_text = f"# Virtual AI OCR of: {Path(pdf_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Extracted Content (Simulated):**\n- **Page 1:** Simulated text from the first page.\n- **Page 2:** Simulated text from the second page."
|
142 |
-
progress(1.0, desc="PDF OCR Simulation Complete!")
|
143 |
-
return ocr_text
|
144 |
-
|
145 |
-
|
146 |
-
# --- 🛠️ Helpers & Main API ---
|
147 |
-
|
148 |
-
def register_local_fonts():
|
149 |
-
"""✒️ Scans for local .ttf fonts and registers them for PDF creation."""
|
150 |
-
text_font_names, emoji_font_name = [], None
|
151 |
-
font_files = list(FONT_DIR.glob("*.ttf"))
|
152 |
-
for font_path in font_files:
|
153 |
-
try:
|
154 |
-
font_name = font_path.stem
|
155 |
-
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
156 |
-
pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
|
157 |
-
pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold")
|
158 |
-
if "notocoloremoji-regular" in font_name.lower():
|
159 |
-
emoji_font_name = font_name
|
160 |
-
else:
|
161 |
-
text_font_names.append(font_name)
|
162 |
-
except: pass
|
163 |
-
if not text_font_names: text_font_names.append('Helvetica')
|
164 |
-
return sorted(text_font_names), emoji_font_name
|
165 |
-
|
166 |
-
def apply_emoji_font(text: str, emoji_font_name: str) -> str:
|
167 |
-
"""😊 Finds emojis and wraps them in special font tags for the PDF."""
|
168 |
-
if not emoji_font_name: return text
|
169 |
-
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
|
170 |
-
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}]+)")
|
171 |
-
return emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', text)
|
172 |
-
|
173 |
def create_pdf_preview(pdf_path: Path):
|
174 |
-
"""🏞️ Generates a PNG thumbnail for the first page of a PDF."""
|
175 |
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
|
176 |
try:
|
177 |
-
doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap(
|
178 |
pix.save(str(preview_path)); doc.close()
|
179 |
-
return preview_path
|
180 |
-
except
|
|
|
181 |
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
action_buttons = """
|
188 |
-
<div class="actions">
|
189 |
-
<button class="action-btn" onclick="event.preventDefault(); alert('QuizMe: Feature coming soon!')">🧠 QuizMe</button>
|
190 |
-
<button class="action-btn" onclick="event.preventDefault(); alert('Revise: Feature coming soon!')">✍️ Revise</button>
|
191 |
-
</div>
|
192 |
-
"""
|
193 |
-
for file_path in generated_files:
|
194 |
-
icon = file_icons.get(file_path.suffix, '📎')
|
195 |
-
file_explorer_html += f"""
|
196 |
-
<div class="file-item">
|
197 |
-
<a href="/file={file_path}" class="file-link" download="{file_path.name}">
|
198 |
-
<span class="file-icon">{icon}</span>
|
199 |
-
<span class="file-name">{file_path.name}</span>
|
200 |
-
</a>
|
201 |
-
{action_buttons if file_path.suffix == '.pdf' else ''}
|
202 |
-
</div>
|
203 |
-
"""
|
204 |
-
|
205 |
-
gallery_items = []
|
206 |
-
for pdf_path in pdf_files_for_gallery:
|
207 |
-
preview_path = create_pdf_preview(pdf_path)
|
208 |
-
if preview_path:
|
209 |
-
with open(preview_path, "rb") as f:
|
210 |
-
img_base64 = base64.b64encode(f.read()).decode("utf-8")
|
211 |
-
gallery_items.append({"preview_src": f"data:image/png;base64,{img_base64}", "filename": pdf_path.name})
|
212 |
-
|
213 |
-
gallery_html = ""
|
214 |
-
if gallery_items:
|
215 |
-
thumbs_html = "".join([f'<img src="{item["preview_src"]}" class="thumbnail" onclick="selectThumbnail(this, \'{item["preview_src"]}\', \'{item["filename"]}\')">' for item in gallery_items])
|
216 |
-
gallery_html = f"""
|
217 |
-
<div class="gallery-container">
|
218 |
-
<div class="main-view"><img id="main-image" src="{gallery_items[0]['preview_src']}" class="main-image"></div>
|
219 |
-
<div class="thumbnail-strip">{thumbs_html}</div>
|
220 |
-
</div>
|
221 |
-
<p id="main-filename">{gallery_items[0]['filename']}</p>
|
222 |
-
"""
|
223 |
-
|
224 |
-
html = f"""
|
225 |
-
<style>
|
226 |
-
.tabs {{ display: flex; border-bottom: 2px solid #ccc; }}
|
227 |
-
.tab-button {{ padding: 10px 15px; cursor: pointer; background: #f1f1f1; border: none; font-size: 1.2em; }}
|
228 |
-
.tab-button.active {{ background: #fff; border-top: 2px solid #6366f1; border-left: 1px solid #ccc; border-right: 1px solid #ccc; position: relative; top: 1px;}}
|
229 |
-
.tab-content {{ display: none; padding: 15px; border: 1px solid #ccc; border-top: none; }}
|
230 |
-
.tab-content.active {{ display: block; }}
|
231 |
-
.file-explorer {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 15px; }}
|
232 |
-
.file-item {{ background: #f9f9f9; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
|
233 |
-
.file-link {{ display: flex; align-items: center; padding: 15px; text-decoration: none; color: #333; }}
|
234 |
-
.file-icon {{ font-size: 3em; margin-right: 15px; }}
|
235 |
-
.file-name {{ font-weight: bold; word-break: break-all; }}
|
236 |
-
.actions {{ display: flex; justify-content: space-around; padding: 5px 10px; border-top: 1px solid #eee; }}
|
237 |
-
.action-btn {{ background: none; border: none; cursor: pointer; font-size: 1.1em; }}
|
238 |
-
.gallery-container {{ display: flex; height: 500px; }}
|
239 |
-
.main-view {{ flex: 4; }} .main-image {{ width: 100%; height: 100%; object-fit: contain; }}
|
240 |
-
.thumbnail-strip {{ flex: 1; overflow-y: auto; }} .thumbnail {{ width: 100%; margin-bottom: 5px; cursor: pointer; border: 3px solid transparent; }}
|
241 |
-
.thumbnail.active {{ border-color: #6366f1; }}
|
242 |
-
#main-filename {{ text-align: center; font-weight: bold; margin-top: 10px; }}
|
243 |
-
</style>
|
244 |
-
<div class="tabs">
|
245 |
-
<button class="tab-button active" onclick="openTab(event, 'explorer')">🗂️ FileExplorer</button>
|
246 |
-
{'<button class="tab-button" onclick="openTab(event, \'gallery\')">🖼️ GlimpsePDFs</button>' if gallery_items else ''}
|
247 |
-
</div>
|
248 |
-
<div id="explorer" class="tab-content active"><div class="file-explorer">{file_explorer_html}</div></div>
|
249 |
-
<div id="gallery" class="tab-content">{gallery_html}</div>
|
250 |
-
<script>
|
251 |
-
function openTab(evt, tabName) {{
|
252 |
-
let i, tabcontent = document.getElementsByClassName("tab-content"), tablinks = document.getElementsByClassName("tab-button");
|
253 |
-
for (i = 0; i < tabcontent.length; i++) tabcontent[i].style.display = "none";
|
254 |
-
for (i = 0; i < tablinks.length; i++) tablinks[i].className = tablinks[i].className.replace(" active", "");
|
255 |
-
document.getElementById(tabName).style.display = "block"; evt.currentTarget.className += " active";
|
256 |
-
}}
|
257 |
-
const mainImage = document.getElementById('main-image'), mainFilename = document.getElementById('main-filename'), thumbnails = document.querySelectorAll('.thumbnail');
|
258 |
-
if (thumbnails.length > 0) thumbnails[0].classList.add('active');
|
259 |
-
function selectThumbnail(thumb, src, name) {{
|
260 |
-
mainImage.src = src; mainFilename.textContent = name;
|
261 |
-
thumbnails.forEach(t => t.classList.remove('active')); thumb.classList.add('active');
|
262 |
-
}};
|
263 |
-
</script>
|
264 |
-
"""
|
265 |
-
return html
|
266 |
|
267 |
-
def generate_outputs_api(omni_file, omni_prompt, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
|
268 |
-
if not omni_prompt and not omni_file: raise gr.Error("Please provide a prompt or upload at least one file.")
|
269 |
-
if not output_formats: raise gr.Error("Please select at least one output format.")
|
270 |
-
|
271 |
shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
|
272 |
OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
-
|
290 |
-
for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
|
291 |
-
time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
|
292 |
-
if format_choice == "PDF":
|
293 |
-
for layout_name in layouts:
|
294 |
-
for font_name in fonts:
|
295 |
-
pagesize = LAYOUTS[layout_name]["size"]
|
296 |
-
final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
|
297 |
-
pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
|
298 |
-
filename = f"Document_{time_str}.pdf"; output_path = OUTPUT_DIR / filename
|
299 |
-
with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
|
300 |
-
generated_files.append(output_path); pdf_files_for_gallery.append(output_path)
|
301 |
-
elif format_choice == "DOCX":
|
302 |
-
doc = create_docx(md_content); filename = f"Document_{time_str}.docx"
|
303 |
-
output_path = OUTPUT_DIR / filename; doc.save(output_path); generated_files.append(output_path)
|
304 |
-
elif format_choice == "XLSX":
|
305 |
-
book = create_xlsx(md_content); filename = f"Outline_{time_str}.xlsx"
|
306 |
-
output_path = OUTPUT_DIR / filename; book.save(output_path); generated_files.append(output_path)
|
307 |
-
|
308 |
-
final_html_output = build_file_explorer_html(generated_files, pdf_files_for_gallery)
|
309 |
-
|
310 |
-
return md_content, final_html_output
|
311 |
|
312 |
-
# ---
|
313 |
AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
with gr.Row():
|
320 |
with gr.Column(scale=1):
|
321 |
-
gr.Markdown("### ⚙️
|
322 |
-
|
323 |
-
omni_file = gr.File(label="Upload File (Optional)", file_types=["image", ".wav", ".mp3", ".md", ".pdf"])
|
324 |
-
|
325 |
-
output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
|
326 |
|
327 |
-
with gr.
|
328 |
-
num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
|
329 |
page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
|
330 |
page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
|
331 |
-
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["A4 Portrait"])
|
332 |
-
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
|
333 |
|
334 |
-
|
|
|
|
|
|
|
335 |
|
336 |
with gr.Column(scale=2):
|
337 |
-
gr.Markdown("###
|
338 |
-
|
339 |
-
gr.Markdown("
|
340 |
-
|
341 |
-
|
342 |
-
generate_btn.click(fn=
|
343 |
-
inputs=[omni_file, omni_prompt, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
|
344 |
-
outputs=[ai_response_output, file_explorer_output])
|
345 |
|
346 |
if __name__ == "__main__":
|
347 |
-
demo.launch(
|
|
|
4 |
import re
|
5 |
import os
|
6 |
import shutil
|
7 |
+
import fitz # PyMuPDF
|
|
|
|
|
8 |
from PIL import Image
|
9 |
+
from collections import defaultdict
|
10 |
+
import io
|
|
|
|
|
|
|
11 |
from pypdf import PdfWriter
|
12 |
+
|
13 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
|
14 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
15 |
+
from reportlab.lib.pagesizes import letter, A4, legal, landscape
|
16 |
from reportlab.lib.units import inch
|
17 |
+
from reportlab.lib import colors
|
18 |
from reportlab.pdfbase import pdfmetrics
|
19 |
from reportlab.pdfbase.ttfonts import TTFont
|
20 |
|
|
|
|
|
|
|
21 |
# --- Configuration & Setup ---
|
22 |
CWD = Path.cwd()
|
23 |
+
LAYOUTS = {
|
24 |
+
"A4 Portrait": {"size": A4},
|
25 |
+
"A4 Landscape": {"size": landscape(A4)},
|
26 |
+
"Letter Portrait": {"size": letter},
|
27 |
+
"Letter Landscape": {"size": landscape(letter)},
|
28 |
+
"Legal Portrait": {"size": legal},
|
29 |
+
"Legal Landscape": {"size": landscape(legal)},
|
30 |
+
}
|
31 |
+
OUTPUT_DIR = CWD / "generated_pdfs"
|
32 |
PREVIEW_DIR = CWD / "previews"
|
|
|
33 |
FONT_DIR = CWD
|
34 |
|
35 |
# Create necessary directories
|
36 |
OUTPUT_DIR.mkdir(exist_ok=True)
|
37 |
PREVIEW_DIR.mkdir(exist_ok=True)
|
|
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
# --- Font & Emoji Handling ---
|
41 |
+
|
42 |
+
def register_local_fonts():
|
43 |
+
"""Finds and registers all .ttf files from the application's base directory."""
|
44 |
+
print("--- Font Registration Process Starting ---")
|
45 |
+
text_font_names = []
|
46 |
+
emoji_font_name = None
|
47 |
+
|
48 |
+
print(f"Scanning for fonts in: {FONT_DIR.absolute()}")
|
49 |
+
font_files = list(FONT_DIR.glob("*.ttf"))
|
50 |
+
print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
|
51 |
+
|
52 |
+
for font_path in font_files:
|
53 |
+
try:
|
54 |
+
font_name = font_path.stem
|
55 |
+
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
56 |
+
pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
|
57 |
+
pdfmetrics.registerFont(TTFont(f"{font_name}-Italic", str(font_path)))
|
58 |
+
pdfmetrics.registerFont(TTFont(f"{font_name}-BoldItalic", str(font_path)))
|
59 |
+
pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold", italic=f"{font_name}-Italic", boldItalic=f"{font_name}-BoldItalic")
|
60 |
+
|
61 |
+
if "notocoloremoji-regular" in font_name.lower():
|
62 |
+
emoji_font_name = font_name
|
63 |
+
elif "notoemoji" not in font_name.lower(): # Exclude other symbol fonts from text selection
|
64 |
+
text_font_names.append(font_name)
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Could not register font {font_path.name}: {e}")
|
67 |
+
|
68 |
+
if not text_font_names:
|
69 |
+
print("WARNING: No text fonts found. Adding 'Helvetica' as a default.")
|
70 |
+
text_font_names.append('Helvetica')
|
71 |
+
|
72 |
+
print(f"Successfully registered user-selectable fonts: {text_font_names}")
|
73 |
+
print(f"Emoji font set to: {emoji_font_name}")
|
74 |
+
print("--- Font Registration Process Finished ---")
|
75 |
+
return sorted(text_font_names), emoji_font_name
|
76 |
+
|
77 |
+
def apply_emoji_font(text: str, emoji_font_name: str) -> str:
|
78 |
+
"""
|
79 |
+
Intelligently wraps emoji characters in a <font> tag, preserving existing HTML-like tags.
|
80 |
+
This prevents invalid nested tags like <b><font>...</font></b> which ReportLab handles poorly.
|
81 |
+
"""
|
82 |
+
if not emoji_font_name:
|
83 |
+
return text
|
84 |
+
|
85 |
+
# Regex to find emojis
|
86 |
+
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
|
87 |
+
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
|
88 |
+
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
|
89 |
+
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}"
|
90 |
+
f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)")
|
91 |
+
|
92 |
+
# Regex to split the string by existing tags (<b>, <i>)
|
93 |
+
tag_pattern = re.compile(r"(<[^>]+>)")
|
94 |
+
parts = tag_pattern.split(text)
|
95 |
+
|
96 |
+
result = []
|
97 |
+
for part in parts:
|
98 |
+
if tag_pattern.match(part):
|
99 |
+
# It's a tag, append it as is
|
100 |
+
result.append(part)
|
101 |
else:
|
102 |
+
# It's text, apply emoji font to any emojis within this segment
|
103 |
+
result.append(emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', part))
|
104 |
+
|
105 |
+
return "".join(result)
|
106 |
+
|
107 |
+
|
108 |
+
# --- PDF Generation & Handling ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
|
111 |
+
"""
|
112 |
+
Converts markdown to a ReportLab story, with enhanced styling and page breaks.
|
113 |
+
This version correctly separates structural parsing from content formatting.
|
114 |
+
"""
|
115 |
styles = getSampleStyleSheet()
|
116 |
+
# Define styles for various markdown elements
|
117 |
+
style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, leading=14, fontSize=10)
|
118 |
+
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=24, leading=28, textColor=colors.darkblue)
|
119 |
+
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, fontSize=18, leading=22, spaceBefore=10)
|
120 |
+
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, fontSize=14, leading=18, spaceBefore=8)
|
121 |
+
style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8, leading=12, fontSize=9)
|
122 |
+
style_table_header = ParagraphStyle('TableHeader', parent=style_normal, fontName=font_name + "-Bold" if font_name != 'Helvetica' else 'Helvetica-Bold')
|
123 |
+
|
124 |
+
story = []
|
125 |
+
lines = markdown_text.split('\n')
|
126 |
|
127 |
+
in_code_block, in_table = False, False
|
128 |
+
code_block_text, table_data = "", []
|
129 |
+
first_heading = True
|
130 |
+
|
131 |
+
for line in lines:
|
132 |
stripped_line = line.strip()
|
133 |
+
|
134 |
+
if stripped_line.startswith("```"):
|
135 |
+
if in_code_block:
|
136 |
+
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)); story.append(Spacer(1, 0.1 * inch))
|
137 |
+
in_code_block = False; code_block_text = ""
|
138 |
+
else: in_code_block = True
|
139 |
+
continue
|
140 |
+
if in_code_block:
|
141 |
+
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n'
|
142 |
+
continue
|
143 |
+
|
144 |
+
if stripped_line.startswith('|'):
|
145 |
+
if not in_table: in_table = True
|
146 |
+
if all(c in '-|: ' for c in stripped_line): continue
|
147 |
+
cells = [cell.strip() for cell in stripped_line.strip('|').split('|')]
|
148 |
+
table_data.append(cells)
|
149 |
+
continue
|
150 |
+
if in_table:
|
151 |
+
in_table = False
|
152 |
+
if table_data:
|
153 |
+
header_content = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in table_data[0]]
|
154 |
+
header = [Paragraph(cell, style_table_header) for cell in header_content]
|
155 |
+
|
156 |
+
formatted_rows = []
|
157 |
+
for row in table_data[1:]:
|
158 |
+
formatted_cells = [apply_emoji_font(re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', cell), emoji_font) for cell in row]
|
159 |
+
formatted_rows.append([Paragraph(cell, style_normal) for cell in formatted_cells])
|
160 |
+
|
161 |
+
table = Table([header] + formatted_rows, hAlign='LEFT', repeatRows=1)
|
162 |
+
table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.darkgrey), ('VALIGN', (0,0), (-1,-1), 'MIDDLE')]))
|
163 |
+
story.append(table); story.append(Spacer(1, 0.2 * inch))
|
164 |
+
table_data = []
|
165 |
+
|
166 |
if not stripped_line:
|
167 |
+
story.append(Spacer(1, 0.1 * inch))
|
168 |
+
continue
|
169 |
|
170 |
+
# Default content is the whole stripped line
|
171 |
+
content = stripped_line
|
172 |
+
style = style_normal
|
173 |
+
extra_args = {}
|
174 |
+
|
175 |
+
# Detect structural elements and extract the raw content
|
176 |
if stripped_line.startswith("# "):
|
177 |
if not first_heading: story.append(PageBreak())
|
178 |
+
content = stripped_line.lstrip('# '); style = style_h1; first_heading = False
|
179 |
+
elif stripped_line.startswith("## "):
|
180 |
+
content = stripped_line.lstrip('## '); style = style_h2
|
181 |
+
elif stripped_line.startswith("### "):
|
182 |
+
content = stripped_line.lstrip('### '); style = style_h3
|
183 |
+
elif stripped_line.startswith(("- ", "* ")):
|
184 |
+
content = stripped_line[2:]; extra_args['bulletText'] = '•'
|
185 |
|
186 |
+
# Now, format the extracted content
|
187 |
+
# Apply markdown formatting for bold/italic
|
188 |
formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
|
189 |
+
# Then apply emoji font to the already formatted line
|
190 |
final_content = apply_emoji_font(formatted_content, emoji_font)
|
191 |
|
192 |
story.append(Paragraph(final_content, style, **extra_args))
|
193 |
return story
|
194 |
|
195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
def create_pdf_preview(pdf_path: Path):
|
|
|
197 |
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
|
198 |
try:
|
199 |
+
doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
|
200 |
pix.save(str(preview_path)); doc.close()
|
201 |
+
return str(preview_path)
|
202 |
+
except Exception as e:
|
203 |
+
print(f"Could not create preview for {pdf_path.name}: {e}"); return None
|
204 |
|
205 |
+
# --- Main API Function ---
|
206 |
+
def generate_pdfs_api(files, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
|
207 |
+
if not files: raise gr.Error("Please upload at least one Markdown or Image file.")
|
208 |
+
if not layouts: raise gr.Error("Please select at least one page layout.")
|
209 |
+
if not fonts: raise gr.Error("Please select at least one font.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
|
|
|
|
|
|
|
|
211 |
shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
|
212 |
OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
|
213 |
|
214 |
+
grouped_files = defaultdict(lambda: {'md': [], 'img': []})
|
215 |
+
for f in files:
|
216 |
+
file_path = Path(f.name)
|
217 |
+
stem = file_path.stem.split('_')[0] if '_' in file_path.stem else file_path.stem
|
218 |
+
if file_path.suffix.lower() == '.md': grouped_files[stem]['md'].append(file_path)
|
219 |
+
elif file_path.suffix.lower() in ['.png', '.jpg', '.jpeg']: grouped_files[stem]['img'].append(file_path)
|
220 |
+
|
221 |
+
log_updates, generated_pdf_paths = "", []
|
222 |
+
|
223 |
+
for stem, assets in progress.tqdm(grouped_files.items(), desc="Processing File Groups"):
|
224 |
+
for layout_name in layouts:
|
225 |
+
for font_name in fonts:
|
226 |
+
merger = PdfWriter()
|
227 |
+
|
228 |
+
if assets['md']:
|
229 |
+
md_content = "\n".join([p.read_text(encoding='utf-8') for p in assets['md']])
|
230 |
+
md_buffer = io.BytesIO()
|
231 |
+
story = markdown_to_story(md_content, font_name, EMOJI_FONT_NAME)
|
232 |
+
|
233 |
+
base_w, base_h = LAYOUTS[layout_name]["size"]
|
234 |
+
pagesize = (base_w * page_w_mult, base_h * page_h_mult)
|
235 |
+
|
236 |
+
if num_columns > 1:
|
237 |
+
doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
|
238 |
+
frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
|
239 |
+
frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
|
240 |
+
doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
|
241 |
+
else:
|
242 |
+
doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
|
243 |
+
doc.build(story)
|
244 |
+
merger.append(fileobj=md_buffer)
|
245 |
+
|
246 |
+
for img_path in assets['img']:
|
247 |
+
with Image.open(img_path) as img: img_width, img_height = img.size
|
248 |
+
img_buffer = io.BytesIO()
|
249 |
+
doc = SimpleDocTemplate(img_buffer, pagesize=(img_width, img_height), leftMargin=0, rightMargin=0, topMargin=0, bottomMargin=0)
|
250 |
+
doc.build([ReportLabImage(img_path, width=img_width, height=img_height)])
|
251 |
+
merger.append(fileobj=img_buffer)
|
252 |
+
|
253 |
+
if len(merger.pages) > 0:
|
254 |
+
time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
|
255 |
+
filename = f"{stem}_{time_str}_{layout_name.replace(' ','-')}_{page_w_mult}x{page_h_mult}_{font_name}_Cols{num_columns}.pdf"
|
256 |
+
output_path = OUTPUT_DIR / filename
|
257 |
+
with open(output_path, "wb") as f: merger.write(f)
|
258 |
+
generated_pdf_paths.append(output_path)
|
259 |
+
log_updates += f"Generated: {filename}\n"
|
260 |
+
|
261 |
+
gallery_previews = [create_pdf_preview(p) for p in generated_pdf_paths]
|
262 |
+
final_gallery = [g for g in gallery_previews if g is not None]
|
263 |
|
264 |
+
return final_gallery, log_updates, [str(p) for p in generated_pdf_paths]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
+
# --- Gradio UI Definition ---
|
267 |
AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
|
268 |
+
SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons
|
269 |
+
|
270 |
+
1. 📜 **Introduction**
|
271 |
+
- **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals!
|
272 |
+
- **Usage**: A guide for learning and storytelling across traditions. ️
|
273 |
+
- **Themes**: Justice ⚖️, faith 🙏, hubris 🏛️, redemption ✨, cosmic order 🌌.
|
274 |
+
|
275 |
+
2. 🛠️ **Core Concepts of Divinity**
|
276 |
+
- **Powers**: Creation 🌍, omniscience 👁️🗨️, shapeshifting 🦋 across entities.
|
277 |
+
- **Life Cycle**: Mortality ⏳, immortality ♾️, transitions like saints and avatars 🌟.
|
278 |
+
- **Communication**: Omens 🕊️, visions 👁️, miracles ✨ from gods and spirits.
|
279 |
+
|
280 |
+
# ⚔️ Arthurian Legends
|
281 |
+
- **Merlin, Morgan le Fay, Arthur**: Mentor 🧙, rival 🧙♀️, son 👑.
|
282 |
+
- **Relation**: Family tests loyalty 🤝.
|
283 |
+
- **Lesson**: Honor 🎖️ vs. betrayal 🗡️.
|
284 |
+
|
285 |
+
# 🏛️ Greek Mythology
|
286 |
+
- **Zeus, Hera, Athena**: Father ⚡, mother 👑, daughter 🦉.
|
287 |
+
- **Relation**: Family rules with tension 🌩️.
|
288 |
+
- **Lesson**: Hubris अहंकार meets wisdom 🧠.
|
289 |
+
|
290 |
+
# 🕉️ Hindu Trimurti
|
291 |
+
- **Brahma, Vishnu, Shiva**: Creator Brahma, preserver Vishnu, destroyer Shiva.
|
292 |
+
- **Relation**: Divine trio cycles existence 🔄.
|
293 |
+
- **Lesson**: Balance ⚖️ sustains life 💖.
|
294 |
+
"""
|
295 |
+
with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN)
|
296 |
+
|
297 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
|
298 |
+
gr.Markdown("# 📄 Advanced PDF Layout Engine")
|
299 |
+
gr.Markdown("Upload Markdown/Image files. The app finds local `.ttf` fonts. Group assets with a common name (e.g., `Doc_part1.md`, `Doc_img1.png`) to combine them. `# Headers` create automatic page breaks.")
|
300 |
|
301 |
with gr.Row():
|
302 |
with gr.Column(scale=1):
|
303 |
+
gr.Markdown("### ⚙️ Generation Settings")
|
304 |
+
uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg", ".jpeg"])
|
|
|
|
|
|
|
305 |
|
306 |
+
with gr.Row():
|
|
|
307 |
page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
|
308 |
page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
|
|
|
|
|
309 |
|
310 |
+
num_columns_slider = gr.Slider(label="Number of Text Columns", minimum=1, maximum=4, step=1, value=1)
|
311 |
+
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Base Page Layout", value=["A4 Portrait"])
|
312 |
+
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
|
313 |
+
generate_btn = gr.Button("🚀 Generate PDFs", variant="primary")
|
314 |
|
315 |
with gr.Column(scale=2):
|
316 |
+
gr.Markdown("### 🖼️ PDF Preview Gallery")
|
317 |
+
gallery_output = gr.Gallery(label="Generated PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
|
318 |
+
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
|
319 |
+
downloadable_files_output = gr.Files(label="Download Generated PDFs")
|
320 |
+
|
321 |
+
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider], outputs=[gallery_output, log_output, downloadable_files_output])
|
|
|
|
|
322 |
|
323 |
if __name__ == "__main__":
|
324 |
+
demo.launch()
|