File size: 9,607 Bytes
6098452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import gradio as gr
from pathlib import Path
import datetime
import re
import requests
import os
import shutil
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.pagesizes import letter, A4, legal, landscape
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

# --- Configuration & Setup ---
# These settings define the available layouts and directories for fonts and outputs.

LAYOUTS = {
    "A4 Portrait": {"size": A4},
    "A4 Landscape": {"size": landscape(A4)},
    "Letter Portrait": {"size": letter},
    "Letter Landscape": {"size": landscape(letter)},
    "Legal Portrait": {"size": legal},
    "Legal Landscape": {"size": landscape(legal)},
}

OUTPUT_DIR = Path("generated_pdfs")
OUTPUT_DIR.mkdir(exist_ok=True)
FONT_DIR = Path("fonts")
FONT_DIR.mkdir(exist_ok=True)

# --- Font Handling ---

def download_default_font():
    """Downloads DejaVuSans as a default font if no fonts are available in the 'fonts' directory."""
    dejavu_path = FONT_DIR / "DejaVuSans.ttf"
    if not dejavu_path.exists():
        print("No fonts found. Downloading default font (DejaVuSans)...")
        try:
            url = "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true"
            r = requests.get(url, allow_redirects=True)
            r.raise_for_status()
            with open(dejavu_path, "wb") as f:
                f.write(r.content)
            print("Default font downloaded successfully.")
            return True
        except Exception as e:
            print(f"Failed to download default font: {e}")
            return False
    return True

def discover_and_register_fonts():
    """Finds .ttf files in the font directory, registers them with ReportLab, and returns their names."""
    font_files = list(FONT_DIR.glob("*.ttf"))
    if not font_files:
        if not download_default_font():
            return []  # Return empty if download fails
        font_files = list(FONT_DIR.glob("*.ttf"))

    font_names = []
    for font_path in font_files:
        try:
            font_name = font_path.stem
            pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
            font_names.append(font_name)
        except Exception as e:
            print(f"Could not register font {font_path.name}: {e}")
    return sorted(font_names)

# --- ReportLab PDF Generation (Core Logic) ---

def markdown_to_story(markdown_text: str, font_name: str):
    """Converts a markdown string into a list of ReportLab Flowables ('story') with enhanced styling."""
    styles = getSampleStyleSheet()
    style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6)
    style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name)
    style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name)
    style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name)
    style_code = ParagraphStyle('Code', parent=styles['Code'], fontName=font_name, backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5))

    story = []
    lines = markdown_text.split('\n')
    
    in_code_block, in_table = False, False
    code_block_text, table_data = "", []

    for line in lines:
        if line.strip().startswith('|') and line.strip().endswith('|'): # Table handler
            if not in_table:
                in_table = True
                table_data = []
            if all(c in '-|: ' for c in line.strip()): continue
            cells = [cell.strip() for cell in line.strip().strip('|').split('|')]
            table_data.append([Paragraph(cell, style_normal) for cell in cells])
            continue
        elif in_table:
            in_table = False
            if table_data:
                table = Table(table_data, hAlign='LEFT', colWidths=[1.5*inch]*len(table_data[0]))
                table.setStyle(TableStyle([('BACKGROUND', (0,0), (-1,0), colors.lightgrey), ('GRID', (0,0), (-1,-1), 1, colors.black)]))
                story.append(table)
                story.append(Spacer(1, 0.2 * inch))
                table_data = []

        if line.strip().startswith("```"): # Code block handler
            in_code_block = not in_code_block
            if not in_code_block:
                story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
                code_block_text = ""
            continue
        if in_code_block:
            code_block_text += line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') + '\n'
            continue
        
        # Markdown elements to ReportLab Flowables
        if line.startswith("# "): story.append(Paragraph(line[2:], style_h1))
        elif line.startswith("## "): story.append(Paragraph(line[3:], style_h2))
        elif line.startswith("### "): story.append(Paragraph(line[4:], style_h3))
        elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line.strip()[2:], style_normal, bulletText='β€’'))
        elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line.strip(), style_normal))
        elif line.strip() == "": story.append(Spacer(1, 0.1 * inch))
        else: story.append(Paragraph(re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)), style_normal))
            
    return story

# --- Gradio API Function ---

def generate_pdfs_api(files, layouts, fonts, progress=gr.Progress(track_tqdm=True)):
    """Main function to drive PDF generation from the Gradio UI."""
    if not files: raise gr.Error("Please upload at least one Markdown file.")
    if not layouts: raise gr.Error("Please select at least one page layout.")
    if not fonts: raise gr.Error("Please select at least one font.")

    # Clean output directory before generation
    if OUTPUT_DIR.exists():
        for f in OUTPUT_DIR.glob('*'):
             if f.is_file(): os.remove(f)

    log_updates = "Starting PDF generation...\n"
    total_pdfs = len(files) * len(layouts) * len(fonts)
    
    for md_file_obj in progress.tqdm(files, desc="Processing Files"):
        original_md_path = Path(md_file_obj.name)
        log_updates += f"\nProcessing: **{original_md_path.name}**\n"
        md_content = ""
        with open(original_md_path, 'r', encoding='utf-8') as f:
            md_content = f.read()

        for layout_name in layouts:
            for font_name in fonts:
                status_text = f"  - Generating: {layout_name}, {font_name}"
                log_updates += status_text + "\n"
                
                try:
                    date_str = datetime.datetime.now().strftime("%Y-%m-%d")
                    output_filename = f"{original_md_path.stem}_{layout_name.replace(' ', '-')}_{font_name}_{date_str}.pdf"
                    output_path = OUTPUT_DIR / output_filename

                    doc = SimpleDocTemplate(str(output_path), pagesize=LAYOUTS[layout_name]["size"], rightMargin=inch, leftMargin=inch, topMargin=inch, bottomMargin=inch)
                    story = markdown_to_story(md_content, font_name)
                    doc.build(story)
                except Exception as e:
                    log_updates += f"    - **ERROR**: Failed to process with font {font_name}: {e}\n"

    log_updates += "\nβœ… PDF generation complete!"
    
    generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")]
    return generated_files, log_updates

# --- Gradio UI Definition ---

AVAILABLE_FONTS = discover_and_register_fonts()
SAMPLE_MARKDOWN = "# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF with dynamic fonts and layouts.\n\n### Features\n- Item 1\n- Item 2\n\n| Header 1 | Header 2 |\n|----------|----------|\n| Cell 1   | Cell 2   |\n\n```python\ndef hello():\n    print(\"Hello, PDF!\")\n```\n"

with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo:
    gr.Markdown("# πŸ“„ Advanced Markdown to PDF Generator")
    gr.Markdown("Upload Markdown files, select layouts and fonts, and generate multiple PDF variations.")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### βš™οΈ Generation Settings")
            gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10)
            
            uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"])
            selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0])
            
            if not AVAILABLE_FONTS:
                gr.Warning("No fonts found in 'fonts' directory. Using default.")
                AVAILABLE_FONTS = ["Helvetica"] # ReportLab's default
            
            selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None)
            
            generate_btn = gr.Button("πŸš€ Generate PDFs", variant="primary")

        with gr.Column(scale=2):
            gr.Markdown("### πŸ“„ Results")
            log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...")
            file_output = gr.Files(label="Download Generated PDFs")
    
    generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts], outputs=[file_output, log_output])

if __name__ == "__main__":
    demo.launch()