Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
import datetime
|
4 |
+
import re
|
5 |
+
import requests
|
6 |
+
import os
|
7 |
+
import shutil
|
8 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
9 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
10 |
+
from reportlab.lib.pagesizes import letter, A4, legal, landscape
|
11 |
+
from reportlab.lib.units import inch
|
12 |
+
from reportlab.lib import colors
|
13 |
+
from reportlab.pdfbase import pdfmetrics
|
14 |
+
from reportlab.pdfbase.ttfonts import TTFont
|
15 |
+
|
16 |
+
# --- Configuration & Setup ---
|
17 |
+
# These settings define the available layouts and directories for fonts and outputs.
|
18 |
+
|
19 |
+
LAYOUTS = {
|
20 |
+
"A4 Portrait": {"size": A4},
|
21 |
+
"A4 Landscape": {"size": landscape(A4)},
|
22 |
+
"Letter Portrait": {"size": letter},
|
23 |
+
"Letter Landscape": {"size": landscape(letter)},
|
24 |
+
"Legal Portrait": {"size": legal},
|
25 |
+
"Legal Landscape": {"size": landscape(legal)},
|
26 |
+
}
|
27 |
+
|
28 |
+
OUTPUT_DIR = Path("generated_pdfs")
|
29 |
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
30 |
+
FONT_DIR = Path("fonts")
|
31 |
+
FONT_DIR.mkdir(exist_ok=True)
|
32 |
+
|
33 |
+
# --- Font Handling ---
|
34 |
+
|
35 |
+
def download_default_font():
|
36 |
+
"""Downloads DejaVuSans as a default font if no fonts are available in the 'fonts' directory."""
|
37 |
+
dejavu_path = FONT_DIR / "DejaVuSans.ttf"
|
38 |
+
if not dejavu_path.exists():
|
39 |
+
print("No fonts found. Downloading default font (DejaVuSans)...")
|
40 |
+
try:
|
41 |
+
url = "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true"
|
42 |
+
r = requests.get(url, allow_redirects=True)
|
43 |
+
r.raise_for_status()
|
44 |
+
with open(dejavu_path, "wb") as f:
|
45 |
+
f.write(r.content)
|
46 |
+
print("Default font downloaded successfully.")
|
47 |
+
return True
|
48 |
+
except Exception as e:
|
49 |
+
print(f"Failed to download default font: {e}")
|
50 |
+
return False
|
51 |
+
return True
|
52 |
+
|
53 |
+
def discover_and_register_fonts():
|
54 |
+
"""Finds .ttf files in the font directory, registers them with ReportLab, and returns their names."""
|
55 |
+
font_files = list(FONT_DIR.glob("*.ttf"))
|
56 |
+
if not font_files:
|
57 |
+
if not download_default_font():
|
58 |
+
return [] # Return empty if download fails
|
59 |
+
font_files = list(FONT_DIR.glob("*.ttf"))
|
60 |
+
|
61 |
+
font_names = []
|
62 |
+
for font_path in font_files:
|
63 |
+
try:
|
64 |
+
font_name = font_path.stem
|
65 |
+
pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
|
66 |
+
font_names.append(font_name)
|
67 |
+
except Exception as e:
|
68 |
+
print(f"Could not register font {font_path.name}: {e}")
|
69 |
+
return sorted(font_names)
|
70 |
+
|
71 |
+
# --- ReportLab PDF Generation (Core Logic) ---
|
72 |
+
|
73 |
+
def markdown_to_story(markdown_text: str, font_name: str):
|
74 |
+
"""Converts a markdown string into a list of ReportLab Flowables ('story') with enhanced styling."""
|
75 |
+
styles = getSampleStyleSheet()
|
76 |
+
style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6)
|
77 |
+
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name)
|
78 |
+
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name)
|
79 |
+
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name)
|
80 |
+
style_code = ParagraphStyle('Code', parent=styles['Code'], fontName=font_name, backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))
|
81 |
+
|
82 |
+
story = []
|
83 |
+
lines = markdown_text.split('\n')
|
84 |
+
|
85 |
+
in_code_block, in_table = False, False
|
86 |
+
code_block_text, table_data = "", []
|
87 |
+
|
88 |
+
for line in lines:
|
89 |
+
if line.strip().startswith('|') and line.strip().endswith('|'): # Table handler
|
90 |
+
if not in_table:
|
91 |
+
in_table = True
|
92 |
+
table_data = []
|
93 |
+
if all(c in '-|: ' for c in line.strip()): continue
|
94 |
+
cells = [cell.strip() for cell in line.strip().strip('|').split('|')]
|
95 |
+
table_data.append([Paragraph(cell, style_normal) for cell in cells])
|
96 |
+
continue
|
97 |
+
elif in_table:
|
98 |
+
in_table = False
|
99 |
+
if table_data:
|
100 |
+
table = Table(table_data, hAlign='LEFT', colWidths=[1.5*inch]*len(table_data[0]))
|
101 |
+
table.setStyle(TableStyle([('BACKGROUND', (0,0), (-1,0), colors.lightgrey), ('GRID', (0,0), (-1,-1), 1, colors.black)]))
|
102 |
+
story.append(table)
|
103 |
+
story.append(Spacer(1, 0.2 * inch))
|
104 |
+
table_data = []
|
105 |
+
|
106 |
+
if line.strip().startswith("```"): # Code block handler
|
107 |
+
in_code_block = not in_code_block
|
108 |
+
if not in_code_block:
|
109 |
+
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
|
110 |
+
code_block_text = ""
|
111 |
+
continue
|
112 |
+
if in_code_block:
|
113 |
+
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n'
|
114 |
+
continue
|
115 |
+
|
116 |
+
# Markdown elements to ReportLab Flowables
|
117 |
+
if line.startswith("# "): story.append(Paragraph(line[2:], style_h1))
|
118 |
+
elif line.startswith("## "): story.append(Paragraph(line[3:], style_h2))
|
119 |
+
elif line.startswith("### "): story.append(Paragraph(line[4:], style_h3))
|
120 |
+
elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line.strip()[2:], style_normal, bulletText='β’'))
|
121 |
+
elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line.strip(), style_normal))
|
122 |
+
elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
|
123 |
+
else: story.append(Paragraph(re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)), style_normal))
|
124 |
+
|
125 |
+
return story
|
126 |
+
|
127 |
+
# --- Gradio API Function ---
|
128 |
+
|
129 |
+
def generate_pdfs_api(files, layouts, fonts, progress=gr.Progress(track_tqdm=True)):
|
130 |
+
"""Main function to drive PDF generation from the Gradio UI."""
|
131 |
+
if not files: raise gr.Error("Please upload at least one Markdown file.")
|
132 |
+
if not layouts: raise gr.Error("Please select at least one page layout.")
|
133 |
+
if not fonts: raise gr.Error("Please select at least one font.")
|
134 |
+
|
135 |
+
# Clean output directory before generation
|
136 |
+
if OUTPUT_DIR.exists():
|
137 |
+
for f in OUTPUT_DIR.glob('*'):
|
138 |
+
if f.is_file(): os.remove(f)
|
139 |
+
|
140 |
+
log_updates = "Starting PDF generation...\n"
|
141 |
+
total_pdfs = len(files) * len(layouts) * len(fonts)
|
142 |
+
|
143 |
+
for md_file_obj in progress.tqdm(files, desc="Processing Files"):
|
144 |
+
original_md_path = Path(md_file_obj.name)
|
145 |
+
log_updates += f"\nProcessing: **{original_md_path.name}**\n"
|
146 |
+
md_content = ""
|
147 |
+
with open(original_md_path, 'r', encoding='utf-8') as f:
|
148 |
+
md_content = f.read()
|
149 |
+
|
150 |
+
for layout_name in layouts:
|
151 |
+
for font_name in fonts:
|
152 |
+
status_text = f" - Generating: {layout_name}, {font_name}"
|
153 |
+
log_updates += status_text + "\n"
|
154 |
+
|
155 |
+
try:
|
156 |
+
date_str = datetime.datetime.now().strftime("%Y-%m-%d")
|
157 |
+
output_filename = f"{original_md_path.stem}_{layout_name.replace(' ', '-')}_{font_name}_{date_str}.pdf"
|
158 |
+
output_path = OUTPUT_DIR / output_filename
|
159 |
+
|
160 |
+
doc = SimpleDocTemplate(str(output_path), pagesize=LAYOUTS[layout_name]["size"], rightMargin=inch, leftMargin=inch, topMargin=inch, bottomMargin=inch)
|
161 |
+
story = markdown_to_story(md_content, font_name)
|
162 |
+
doc.build(story)
|
163 |
+
except Exception as e:
|
164 |
+
log_updates += f" - **ERROR**: Failed to process with font {font_name}: {e}\n"
|
165 |
+
|
166 |
+
log_updates += "\nβ
PDF generation complete!"
|
167 |
+
|
168 |
+
generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
|
169 |
+
return generated_files, log_updates
|
170 |
+
|
171 |
+
# --- Gradio UI Definition ---
|
172 |
+
|
173 |
+
AVAILABLE_FONTS = discover_and_register_fonts()
|
174 |
+
SAMPLE_MARKDOWN = "# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF with dynamic fonts and layouts.\n\n### Features\n- Item 1\n- Item 2\n\n| Header 1 | Header 2 |\n|----------|----------|\n| Cell 1 | Cell 2 |\n\n```python\ndef hello():\n print(\"Hello, PDF!\")\n```\n"
|
175 |
+
|
176 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
|
177 |
+
gr.Markdown("# π Advanced Markdown to PDF Generator")
|
178 |
+
gr.Markdown("Upload Markdown files, select layouts and fonts, and generate multiple PDF variations.")
|
179 |
+
|
180 |
+
with gr.Row():
|
181 |
+
with gr.Column(scale=1):
|
182 |
+
gr.Markdown("### βοΈ Generation Settings")
|
183 |
+
gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10)
|
184 |
+
|
185 |
+
uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
|
186 |
+
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
|
187 |
+
|
188 |
+
if not AVAILABLE_FONTS:
|
189 |
+
gr.Warning("No fonts found in 'fonts' directory. Using default.")
|
190 |
+
AVAILABLE_FONTS = ["Helvetica"] # ReportLab's default
|
191 |
+
|
192 |
+
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
|
193 |
+
|
194 |
+
generate_btn = gr.Button("π Generate PDFs", variant="primary")
|
195 |
+
|
196 |
+
with gr.Column(scale=2):
|
197 |
+
gr.Markdown("### π Results")
|
198 |
+
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
|
199 |
+
file_output = gr.Files(label="Download Generated PDFs")
|
200 |
+
|
201 |
+
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts], outputs=[file_output, log_output])
|
202 |
+
|
203 |
+
if __name__ == "__main__":
|
204 |
+
demo.launch()
|