awacke1 commited on
Commit
e13b6e9
Β·
verified Β·
1 Parent(s): 9a6c15a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -0
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ import datetime
4
+ import re
5
+ import os
6
+ import shutil
7
+ import fitz # PyMuPDF
8
+ from PIL import Image
9
+ from collections import defaultdict
10
+ import io
11
+ from pypdf import PdfWriter
12
+
13
+ # Imports for new formats
14
+ from docx import Document
15
+ from docx.shared import Inches
16
+ import openpyxl
17
+
18
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
19
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
20
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
21
+ from reportlab.lib.units import inch
22
+ from reportlab.lib import colors
23
+ from reportlab.pdfbase import pdfmetrics
24
+ from reportlab.pdfbase.ttfonts import TTFont
25
+
26
+ # --- Configuration & Setup ---
27
+ CWD = Path.cwd()
28
+ LAYOUTS = {
29
+ "A4 Portrait": {"size": A4},
30
+ "A4 Landscape": {"size": landscape(A4)},
31
+ "Letter Portrait": {"size": letter},
32
+ "Letter Landscape": {"size": landscape(letter)},
33
+ }
34
+ OUTPUT_DIR = CWD / "generated_outputs"
35
+ PREVIEW_DIR = CWD / "previews"
36
+ FONT_DIR = CWD
37
+
38
+ # Create necessary directories
39
+ OUTPUT_DIR.mkdir(exist_ok=True)
40
+ PREVIEW_DIR.mkdir(exist_ok=True)
41
+
42
+
43
+ # --- Font & Emoji Handling (for PDF) ---
44
+
45
+ def register_local_fonts():
46
+ """Finds and registers all .ttf files from the application's base directory."""
47
+ text_font_names, emoji_font_name = [], None
48
+ font_files = list(FONT_DIR.glob("*.ttf"))
49
+ print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
50
+
51
+ for font_path in font_files:
52
+ try:
53
+ font_name = font_path.stem
54
+ # Register the regular font
55
+ pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
56
+ # Also register a bold version, pointing to the same file. ReportLab's <b> tag will work.
57
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
58
+ pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold")
59
+
60
+ if "notocoloremoji-regular" in font_name.lower():
61
+ emoji_font_name = font_name
62
+ elif "notoemoji" not in font_name.lower():
63
+ text_font_names.append(font_name)
64
+ except Exception as e:
65
+ print(f"Could not register font {font_path.name}: {e}")
66
+ if not text_font_names: text_font_names.append('Helvetica')
67
+ return sorted(text_font_names), emoji_font_name
68
+
69
+ def apply_emoji_font(text: str, emoji_font_name: str) -> str:
70
+ """Wraps emoji characters in a <font> tag for ReportLab."""
71
+ if not emoji_font_name: return text
72
+ emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
73
+ f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}]+)")
74
+ return emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', text)
75
+
76
+
77
+ # --- Document Generation Engines ---
78
+
79
+ def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
80
+ """Generates a PDF file from markdown content."""
81
+ md_buffer = io.BytesIO()
82
+ story = markdown_to_story(md_content, font_name, emoji_font)
83
+ if num_columns > 1:
84
+ doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch)
85
+ frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
86
+ frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
87
+ doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
88
+ else:
89
+ doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
90
+ doc.build(story)
91
+ return md_buffer
92
+
93
+ def create_docx(md_content):
94
+ """Generates a DOCX file from markdown content."""
95
+ document = Document()
96
+ for line in md_content.split('\n'):
97
+ if line.startswith('# '): document.add_heading(line[2:], level=1)
98
+ elif line.startswith('## '): document.add_heading(line[3:], level=2)
99
+ elif line.strip().startswith(('- ','* ')): document.add_paragraph(line.strip()[2:], style='List Bullet')
100
+ else:
101
+ p = document.add_paragraph()
102
+ parts = re.split(r'(\*\*.*?\*\*)', line)
103
+ for part in parts:
104
+ if part.startswith('**') and part.endswith('**'): p.add_run(part[2:-2]).bold = True
105
+ else: p.add_run(part)
106
+ return document
107
+
108
+ def create_xlsx(md_content):
109
+ """Generates an XLSX file, splitting content by H1 headers into columns."""
110
+ workbook = openpyxl.Workbook(); sheet = workbook.active
111
+ sections = re.split(r'\n# ', '\n' + md_content)
112
+ if sections[0] == '': sections.pop(0)
113
+ column_data = []
114
+ for section in sections:
115
+ lines = section.split('\n'); header = lines[0]
116
+ content = [l.strip() for l in lines[1:] if l.strip()]
117
+ column_data.append({'header': header, 'content': content})
118
+ for c_idx, col in enumerate(column_data, 1):
119
+ sheet.cell(row=1, column=c_idx, value=col['header'])
120
+ for r_idx, line_content in enumerate(col['content'], 2):
121
+ sheet.cell(row=r_idx, column=c_idx, value=line_content)
122
+ return workbook
123
+
124
+ def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
125
+ """Converts markdown to a ReportLab story for PDF generation with enhanced styling."""
126
+ styles = getSampleStyleSheet()
127
+ # Use the bold variant of the selected font for headers
128
+ bold_font = f"{font_name}-Bold" if font_name != "Helvetica" else "Helvetica-Bold"
129
+
130
+ # Create styles with dynamic font sizes and bolding for headers
131
+ style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10)
132
+ style_h1 = ParagraphStyle('h1', fontName=bold_font, spaceBefore=12, fontSize=24, leading=28)
133
+ style_h2 = ParagraphStyle('h2', fontName=bold_font, spaceBefore=10, fontSize=18, leading=22)
134
+ style_h3 = ParagraphStyle('h3', fontName=bold_font, spaceBefore=8, fontSize=14, leading=18)
135
+
136
+ story, first_heading = [], True
137
+ for line in markdown_text.split('\n'):
138
+ content, style = line, style_normal
139
+
140
+ # Determine the style based on markdown heading level
141
+ if line.startswith("# "):
142
+ if not first_heading: story.append(PageBreak())
143
+ content, style, first_heading = line.lstrip('# '), style_h1, False
144
+ elif line.startswith("## "):
145
+ content, style = line.lstrip('## '), style_h2
146
+ elif line.startswith("### "):
147
+ content, style = line.lstrip('### '), style_h3
148
+
149
+ # Apply bold tags and then apply emoji font wrapper
150
+ formatted_content = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content)
151
+ final_content = apply_emoji_font(formatted_content, emoji_font)
152
+ story.append(Paragraph(final_content, style))
153
+
154
+ return story
155
+
156
+ def create_pdf_preview(pdf_path: Path):
157
+ preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
158
+ try:
159
+ doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
160
+ pix.save(str(preview_path)); doc.close()
161
+ return str(preview_path)
162
+ except: return None
163
+
164
+ # --- Main API Function ---
165
+ def generate_outputs_api(files, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
166
+ if not files: raise gr.Error("Please upload at least one file.")
167
+ if not output_formats: raise gr.Error("Please select at least one output format.")
168
+
169
+ shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
170
+ OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
171
+
172
+ # Consolidate all markdown content
173
+ md_content = "\n".join([Path(f.name).read_text(encoding='utf-8') for f in files if Path(f.name).suffix.lower() == '.md'])
174
+
175
+ log_updates, generated_files = "", []
176
+
177
+ for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
178
+ time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
179
+
180
+ if format_choice == "PDF":
181
+ for layout_name in layouts:
182
+ for font_name in fonts:
183
+ pagesize = LAYOUTS[layout_name]["size"]
184
+ final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
185
+ pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
186
+ filename = f"Document_{time_str}_{layout_name.replace(' ','-')}_{font_name}.pdf"
187
+ output_path = OUTPUT_DIR / filename
188
+ with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
189
+ generated_files.append(output_path)
190
+
191
+ elif format_choice == "DOCX":
192
+ docx_doc = create_docx(md_content)
193
+ filename = f"Document_{time_str}.docx"
194
+ output_path = OUTPUT_DIR / filename
195
+ docx_doc.save(output_path)
196
+ generated_files.append(output_path)
197
+
198
+ elif format_choice == "XLSX":
199
+ xlsx_book = create_xlsx(md_content)
200
+ filename = f"Outline_{time_str}.xlsx"
201
+ output_path = OUTPUT_DIR / filename
202
+ xlsx_book.save(output_path)
203
+ generated_files.append(output_path)
204
+
205
+ gallery_previews = [create_pdf_preview(p) for p in generated_files if p.suffix == '.pdf']
206
+ final_gallery = [g for g in gallery_previews if g]
207
+
208
+ return final_gallery, f"Generated {len(generated_files)} files.", [str(p) for p in generated_files]
209
+
210
+ # --- Gradio UI Definition ---
211
+ AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
212
+ SAMPLE_MARKDOWN = "# Deities Guide\n\n- **Purpose**: Explore deities and their morals! \n- **Themes**: Justice βš–οΈ, faith πŸ™\n\n# Arthurian Legends\n\n - **Merlin, Arthur**: Mentor πŸ§™, son πŸ‘‘.\n - **Lesson**: Honor πŸŽ–οΈ vs. betrayal πŸ—‘οΈ."
213
+ with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN)
214
+
215
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Document Generator") as demo:
216
+ gr.Markdown("# πŸ“„ Advanced Document Generator (PDF, DOCX, XLSX)")
217
+ gr.Markdown("Upload Markdown files to generate documents in multiple formats. `# Headers` create columns in XLSX and page breaks in multi-page PDFs.")
218
+
219
+ with gr.Row():
220
+ with gr.Column(scale=1):
221
+ gr.Markdown("### βš™οΈ Generation Settings")
222
+ uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg"])
223
+
224
+ output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
225
+
226
+ with gr.Accordion("PDF Customization", open=True):
227
+ with gr.Row():
228
+ page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
229
+ page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
230
+ num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
231
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["A4 Portrait"])
232
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
233
+
234
+ generate_btn = gr.Button("πŸš€ Generate Documents", variant="primary")
235
+
236
+ with gr.Column(scale=2):
237
+ gr.Markdown("### πŸ–ΌοΈ Output Files")
238
+ gallery_output = gr.Gallery(label="PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
239
+ log_output = gr.Markdown(label="Generation Log", value="Ready...")
240
+ downloadable_files_output = gr.Files(label="Download Generated Files")
241
+
242
+ generate_btn.click(fn=generate_outputs_api,
243
+ inputs=[uploaded_files, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
244
+ outputs=[gallery_output, log_output, downloadable_files_output])
245
+
246
+ if __name__ == "__main__":
247
+ demo.launch()