awacke1 commited on
Commit
7ac0a4d
Β·
verified Β·
1 Parent(s): 4365599

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +248 -0
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ import datetime
4
+ import re
5
+ import os
6
+ import shutil
7
+ import fitz # PyMuPDF
8
+ from PIL import Image
9
+ from collections import defaultdict
10
+ import io
11
+ from pypdf import PdfWriter
12
+
13
+ # Imports for new formats
14
+ from docx import Document
15
+ from docx.shared import Inches
16
+ import openpyxl
17
+
18
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
19
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
20
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
21
+ from reportlab.lib.units import inch
22
+ from reportlab.lib import colors
23
+ from reportlab.pdfbase import pdfmetrics
24
+ from reportlab.pdfbase.ttfonts import TTFont
25
+
26
+ # --- Configuration & Setup ---
27
+ CWD = Path.cwd()
28
+ LAYOUTS = {
29
+ "A4 Portrait": {"size": A4},
30
+ "A4 Landscape": {"size": landscape(A4)},
31
+ "Letter Portrait": {"size": letter},
32
+ "Letter Landscape": {"size": landscape(letter)},
33
+ }
34
+ OUTPUT_DIR = CWD / "generated_outputs"
35
+ PREVIEW_DIR = CWD / "previews"
36
+ FONT_DIR = CWD
37
+
38
+ # Create necessary directories
39
+ OUTPUT_DIR.mkdir(exist_ok=True)
40
+ PREVIEW_DIR.mkdir(exist_ok=True)
41
+
42
+
43
+ # --- Font & Emoji Handling (for PDF) ---
44
+
45
+ def register_local_fonts():
46
+ """Finds and registers all .ttf files from the application's base directory."""
47
+ text_font_names, emoji_font_name = [], None
48
+ font_files = list(FONT_DIR.glob("*.ttf"))
49
+ print(f"Found {len(font_files)} .ttf files: {[f.name for f in font_files]}")
50
+
51
+ for font_path in font_files:
52
+ try:
53
+ font_name = font_path.stem
54
+ pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
55
+ pdfmetrics.registerFont(TTFont(f"{font_name}-Bold", str(font_path)))
56
+ pdfmetrics.registerFontFamily(font_name, normal=font_name, bold=f"{font_name}-Bold")
57
+ if "notocoloremoji-regular" in font_name.lower():
58
+ emoji_font_name = font_name
59
+ else:
60
+ text_font_names.append(font_name)
61
+ except Exception as e:
62
+ print(f"Could not register font {font_path.name}: {e}")
63
+ if not text_font_names: text_font_names.append('Helvetica')
64
+ return sorted(text_font_names), emoji_font_name
65
+
66
+ def apply_emoji_font(text: str, emoji_font_name: str) -> str:
67
+ if not emoji_font_name: return text
68
+ emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
69
+ f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}]+)")
70
+ return emoji_pattern.sub(fr'<font name="{emoji_font_name}">\1</font>', text)
71
+
72
+
73
+ # --- Document Generation Engines ---
74
+
75
+ def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
76
+ """Generates a PDF file from markdown content."""
77
+ md_buffer = io.BytesIO()
78
+ story = markdown_to_story(md_content, font_name, emoji_font)
79
+ if num_columns > 1:
80
+ doc = BaseDocTemplate(md_buffer, pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch)
81
+ frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch
82
+ frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height) for i in range(num_columns)]
83
+ doc.addPageTemplates([PageTemplate(id='MultiCol', frames=frames)])
84
+ else:
85
+ doc = SimpleDocTemplate(md_buffer, pagesize=pagesize)
86
+ doc.build(story)
87
+ return md_buffer
88
+
89
+ def create_docx(md_content):
90
+ """Generates a DOCX file from markdown content."""
91
+ document = Document()
92
+ for line in md_content.split('\n'):
93
+ if line.startswith('# '):
94
+ document.add_heading(line[2:], level=1)
95
+ elif line.startswith('## '):
96
+ document.add_heading(line[3:], level=2)
97
+ elif line.strip().startswith(('- ','* ')):
98
+ document.add_paragraph(line.strip()[2:], style='List Bullet')
99
+ else:
100
+ p = document.add_paragraph()
101
+ # Handle bold and italics with runs
102
+ parts = re.split(r'(\*\*.*?\*\*|_.*?_)', line)
103
+ for part in parts:
104
+ if part.startswith('**') and part.endswith('**'):
105
+ p.add_run(part[2:-2]).bold = True
106
+ elif part.startswith('_') and part.endswith('_'):
107
+ p.add_run(part[1:-1]).italic = True
108
+ else:
109
+ p.add_run(part)
110
+ return document
111
+
112
+ def create_xlsx(md_content):
113
+ """Generates an XLSX file, splitting content by H1 headers into columns."""
114
+ workbook = openpyxl.Workbook()
115
+ sheet = workbook.active
116
+
117
+ # Split content by H1 headers
118
+ sections = re.split(r'\n# ', '\n' + md_content)
119
+ if sections[0] == '': sections.pop(0)
120
+
121
+ max_rows = 0
122
+ column_data = []
123
+
124
+ for section in sections:
125
+ lines = section.split('\n')
126
+ header = lines[0]
127
+ content = [l.strip() for l in lines[1:] if l.strip()]
128
+ column_data.append({'header': header, 'content': content})
129
+ if len(content) > max_rows:
130
+ max_rows = len(content)
131
+
132
+ for c_idx, col in enumerate(column_data, 1):
133
+ sheet.cell(row=1, column=c_idx, value=col['header'])
134
+ for r_idx, line_content in enumerate(col['content'], 2):
135
+ sheet.cell(row=r_idx, column=c_idx, value=line_content)
136
+
137
+ return workbook
138
+
139
+ def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
140
+ """Converts markdown to a ReportLab story for PDF generation."""
141
+ styles = getSampleStyleSheet()
142
+ style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10)
143
+ style_h1 = ParagraphStyle('h1', fontName=font_name, spaceBefore=12, fontSize=24)
144
+ story, first_heading = [], True
145
+ for line in markdown_text.split('\n'):
146
+ content, style = line, style_normal
147
+ if line.startswith("# "):
148
+ if not first_heading: story.append(PageBreak())
149
+ content, style, first_heading = line.lstrip('# '), style_h1, False
150
+ formatted_content = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content)
151
+ final_content = apply_emoji_font(formatted_content, emoji_font)
152
+ story.append(Paragraph(final_content, style))
153
+ return story
154
+
155
+ def create_pdf_preview(pdf_path: Path):
156
+ preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
157
+ try:
158
+ doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
159
+ pix.save(str(preview_path)); doc.close()
160
+ return str(preview_path)
161
+ except: return None
162
+
163
+ # --- Main API Function ---
164
+ def generate_outputs_api(files, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
165
+ if not files: raise gr.Error("Please upload at least one file.")
166
+ if not output_formats: raise gr.Error("Please select at least one output format.")
167
+
168
+ shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
169
+ OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
170
+
171
+ # Consolidate all markdown content
172
+ md_content = "\n\n# ".join([Path(f.name).read_text(encoding='utf-8') for f in files if Path(f.name).suffix.lower() == '.md'])
173
+ if not md_content.startswith("# "): md_content = "# " + md_content
174
+
175
+ log_updates, generated_files = "", []
176
+
177
+ for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
178
+ time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
179
+
180
+ if format_choice == "PDF":
181
+ for layout_name in layouts:
182
+ for font_name in fonts:
183
+ pagesize = LAYOUTS[layout_name]["size"]
184
+ final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
185
+ pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
186
+
187
+ filename = f"Document_{time_str}_{layout_name.replace(' ','-')}_{font_name}.pdf"
188
+ output_path = OUTPUT_DIR / filename
189
+ with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
190
+ generated_files.append(output_path)
191
+
192
+ elif format_choice == "DOCX":
193
+ docx_doc = create_docx(md_content)
194
+ filename = f"Document_{time_str}.docx"
195
+ output_path = OUTPUT_DIR / filename
196
+ docx_doc.save(output_path)
197
+ generated_files.append(output_path)
198
+
199
+ elif format_choice == "XLSX":
200
+ xlsx_book = create_xlsx(md_content)
201
+ filename = f"Outline_{time_str}.xlsx"
202
+ output_path = OUTPUT_DIR / filename
203
+ xlsx_book.save(output_path)
204
+ generated_files.append(output_path)
205
+
206
+ gallery_previews = [create_pdf_preview(p) for p in generated_files if p.suffix == '.pdf']
207
+ final_gallery = [g for g in gallery_previews if g]
208
+
209
+ return final_gallery, f"Generated {len(generated_files)} files.", [str(p) for p in generated_files]
210
+
211
+ # --- Gradio UI Definition ---
212
+ AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
213
+ SAMPLE_MARKDOWN = "# Deities Guide\n\n- **Purpose**: Explore deities and their morals! \n- **Themes**: Justice βš–οΈ, faith πŸ™\n\n# Arthurian Legends\n\n - **Merlin, Arthur**: Mentor πŸ§™, son πŸ‘‘.\n - **Lesson**: Honor πŸŽ–οΈ vs. betrayal πŸ—‘οΈ."
214
+ with open(CWD / "sample.md", "w", encoding="utf-8") as f: f.write(SAMPLE_MARKDOWN)
215
+
216
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Document Generator") as demo:
217
+ gr.Markdown("# πŸ“„ Advanced Document Generator (PDF, DOCX, XLSX)")
218
+ gr.Markdown("Upload Markdown files to generate documents in multiple formats. `# Headers` create columns in XLSX and page breaks in multi-page PDFs.")
219
+
220
+ with gr.Row():
221
+ with gr.Column(scale=1):
222
+ gr.Markdown("### βš™οΈ Generation Settings")
223
+ uploaded_files = gr.File(label="Upload Markdown & Image Files", file_count="multiple", file_types=[".md", ".png", ".jpg"])
224
+
225
+ output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
226
+
227
+ with gr.Accordion("PDF Customization", open=True):
228
+ with gr.Row():
229
+ page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
230
+ page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
231
+ num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
232
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Base Page Layout", value=["A4 Portrait"])
233
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Text Font", value=[AVAILABLE_FONTS[0]] if AVAILABLE_FONTS else [])
234
+
235
+ generate_btn = gr.Button("πŸš€ Generate Documents", variant="primary")
236
+
237
+ with gr.Column(scale=2):
238
+ gr.Markdown("### πŸ–ΌοΈ Output Files")
239
+ gallery_output = gr.Gallery(label="PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
240
+ log_output = gr.Markdown(label="Generation Log", value="Ready...")
241
+ downloadable_files_output = gr.Files(label="Download Generated Files")
242
+
243
+ generate_btn.click(fn=generate_outputs_api,
244
+ inputs=[uploaded_files, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
245
+ outputs=[gallery_output, log_output, downloadable_files_output])
246
+
247
+ if __name__ == "__main__":
248
+ demo.launch()