awacke1 commited on
Commit
6098452
Β·
verified Β·
1 Parent(s): c597c90

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -0
app.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ import datetime
4
+ import re
5
+ import requests
6
+ import os
7
+ import shutil
8
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
9
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
10
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
11
+ from reportlab.lib.units import inch
12
+ from reportlab.lib import colors
13
+ from reportlab.pdfbase import pdfmetrics
14
+ from reportlab.pdfbase.ttfonts import TTFont
15
+
16
+ # --- Configuration & Setup ---
17
+ # These settings define the available layouts and directories for fonts and outputs.
18
+
19
+ LAYOUTS = {
20
+ "A4 Portrait": {"size": A4},
21
+ "A4 Landscape": {"size": landscape(A4)},
22
+ "Letter Portrait": {"size": letter},
23
+ "Letter Landscape": {"size": landscape(letter)},
24
+ "Legal Portrait": {"size": legal},
25
+ "Legal Landscape": {"size": landscape(legal)},
26
+ }
27
+
28
+ OUTPUT_DIR = Path("generated_pdfs")
29
+ OUTPUT_DIR.mkdir(exist_ok=True)
30
+ FONT_DIR = Path("fonts")
31
+ FONT_DIR.mkdir(exist_ok=True)
32
+
33
+ # --- Font Handling ---
34
+
35
+ def download_default_font():
36
+ """Downloads DejaVuSans as a default font if no fonts are available in the 'fonts' directory."""
37
+ dejavu_path = FONT_DIR / "DejaVuSans.ttf"
38
+ if not dejavu_path.exists():
39
+ print("No fonts found. Downloading default font (DejaVuSans)...")
40
+ try:
41
+ url = "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true"
42
+ r = requests.get(url, allow_redirects=True)
43
+ r.raise_for_status()
44
+ with open(dejavu_path, "wb") as f:
45
+ f.write(r.content)
46
+ print("Default font downloaded successfully.")
47
+ return True
48
+ except Exception as e:
49
+ print(f"Failed to download default font: {e}")
50
+ return False
51
+ return True
52
+
53
+ def discover_and_register_fonts():
54
+ """Finds .ttf files in the font directory, registers them with ReportLab, and returns their names."""
55
+ font_files = list(FONT_DIR.glob("*.ttf"))
56
+ if not font_files:
57
+ if not download_default_font():
58
+ return [] # Return empty if download fails
59
+ font_files = list(FONT_DIR.glob("*.ttf"))
60
+
61
+ font_names = []
62
+ for font_path in font_files:
63
+ try:
64
+ font_name = font_path.stem
65
+ pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
66
+ font_names.append(font_name)
67
+ except Exception as e:
68
+ print(f"Could not register font {font_path.name}: {e}")
69
+ return sorted(font_names)
70
+
71
+ # --- ReportLab PDF Generation (Core Logic) ---
72
+
73
+ def markdown_to_story(markdown_text: str, font_name: str):
74
+ """Converts a markdown string into a list of ReportLab Flowables ('story') with enhanced styling."""
75
+ styles = getSampleStyleSheet()
76
+ style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6)
77
+ style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name)
78
+ style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name)
79
+ style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name)
80
+ style_code = ParagraphStyle('Code', parent=styles['Code'], fontName=font_name, backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))
81
+
82
+ story = []
83
+ lines = markdown_text.split('\n')
84
+
85
+ in_code_block, in_table = False, False
86
+ code_block_text, table_data = "", []
87
+
88
+ for line in lines:
89
+ if line.strip().startswith('|') and line.strip().endswith('|'): # Table handler
90
+ if not in_table:
91
+ in_table = True
92
+ table_data = []
93
+ if all(c in '-|: ' for c in line.strip()): continue
94
+ cells = [cell.strip() for cell in line.strip().strip('|').split('|')]
95
+ table_data.append([Paragraph(cell, style_normal) for cell in cells])
96
+ continue
97
+ elif in_table:
98
+ in_table = False
99
+ if table_data:
100
+ table = Table(table_data, hAlign='LEFT', colWidths=[1.5*inch]*len(table_data[0]))
101
+ table.setStyle(TableStyle([('BACKGROUND', (0,0), (-1,0), colors.lightgrey), ('GRID', (0,0), (-1,-1), 1, colors.black)]))
102
+ story.append(table)
103
+ story.append(Spacer(1, 0.2 * inch))
104
+ table_data = []
105
+
106
+ if line.strip().startswith("```"): # Code block handler
107
+ in_code_block = not in_code_block
108
+ if not in_code_block:
109
+ story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
110
+ code_block_text = ""
111
+ continue
112
+ if in_code_block:
113
+ code_block_text += line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') + '\n'
114
+ continue
115
+
116
+ # Markdown elements to ReportLab Flowables
117
+ if line.startswith("# "): story.append(Paragraph(line[2:], style_h1))
118
+ elif line.startswith("## "): story.append(Paragraph(line[3:], style_h2))
119
+ elif line.startswith("### "): story.append(Paragraph(line[4:], style_h3))
120
+ elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line.strip()[2:], style_normal, bulletText='β€’'))
121
+ elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line.strip(), style_normal))
122
+ elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
123
+ else: story.append(Paragraph(re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)), style_normal))
124
+
125
+ return story
126
+
127
+ # --- Gradio API Function ---
128
+
129
+ def generate_pdfs_api(files, layouts, fonts, progress=gr.Progress(track_tqdm=True)):
130
+ """Main function to drive PDF generation from the Gradio UI."""
131
+ if not files: raise gr.Error("Please upload at least one Markdown file.")
132
+ if not layouts: raise gr.Error("Please select at least one page layout.")
133
+ if not fonts: raise gr.Error("Please select at least one font.")
134
+
135
+ # Clean output directory before generation
136
+ if OUTPUT_DIR.exists():
137
+ for f in OUTPUT_DIR.glob('*'):
138
+ if f.is_file(): os.remove(f)
139
+
140
+ log_updates = "Starting PDF generation...\n"
141
+ total_pdfs = len(files) * len(layouts) * len(fonts)
142
+
143
+ for md_file_obj in progress.tqdm(files, desc="Processing Files"):
144
+ original_md_path = Path(md_file_obj.name)
145
+ log_updates += f"\nProcessing: **{original_md_path.name}**\n"
146
+ md_content = ""
147
+ with open(original_md_path, 'r', encoding='utf-8') as f:
148
+ md_content = f.read()
149
+
150
+ for layout_name in layouts:
151
+ for font_name in fonts:
152
+ status_text = f" - Generating: {layout_name}, {font_name}"
153
+ log_updates += status_text + "\n"
154
+
155
+ try:
156
+ date_str = datetime.datetime.now().strftime("%Y-%m-%d")
157
+ output_filename = f"{original_md_path.stem}_{layout_name.replace(' ', '-')}_{font_name}_{date_str}.pdf"
158
+ output_path = OUTPUT_DIR / output_filename
159
+
160
+ doc = SimpleDocTemplate(str(output_path), pagesize=LAYOUTS[layout_name]["size"], rightMargin=inch, leftMargin=inch, topMargin=inch, bottomMargin=inch)
161
+ story = markdown_to_story(md_content, font_name)
162
+ doc.build(story)
163
+ except Exception as e:
164
+ log_updates += f" - **ERROR**: Failed to process with font {font_name}: {e}\n"
165
+
166
+ log_updates += "\nβœ… PDF generation complete!"
167
+
168
+ generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
169
+ return generated_files, log_updates
170
+
171
+ # --- Gradio UI Definition ---
172
+
173
+ AVAILABLE_FONTS = discover_and_register_fonts()
174
+ SAMPLE_MARKDOWN = "# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF with dynamic fonts and layouts.\n\n### Features\n- Item 1\n- Item 2\n\n| Header 1 | Header 2 |\n|----------|----------|\n| Cell 1 | Cell 2 |\n\n```python\ndef hello():\n print(\"Hello, PDF!\")\n```\n"
175
+
176
+ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
177
+ gr.Markdown("# πŸ“„ Advanced Markdown to PDF Generator")
178
+ gr.Markdown("Upload Markdown files, select layouts and fonts, and generate multiple PDF variations.")
179
+
180
+ with gr.Row():
181
+ with gr.Column(scale=1):
182
+ gr.Markdown("### βš™οΈ Generation Settings")
183
+ gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10)
184
+
185
+ uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
186
+ selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
187
+
188
+ if not AVAILABLE_FONTS:
189
+ gr.Warning("No fonts found in 'fonts' directory. Using default.")
190
+ AVAILABLE_FONTS = ["Helvetica"] # ReportLab's default
191
+
192
+ selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
193
+
194
+ generate_btn = gr.Button("πŸš€ Generate PDFs", variant="primary")
195
+
196
+ with gr.Column(scale=2):
197
+ gr.Markdown("### πŸ“„ Results")
198
+ log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
199
+ file_output = gr.Files(label="Download Generated PDFs")
200
+
201
+ generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts], outputs=[file_output, log_output])
202
+
203
+ if __name__ == "__main__":
204
+ demo.launch()