Spaces:
Build error
Build error
import gradio as gr | |
from pathlib import Path | |
import datetime | |
import re | |
import requests | |
import os | |
import shutil | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
# --- Configuration & Setup --- | |
# These settings define the available layouts and directories for fonts and outputs. | |
LAYOUTS = { | |
"A4 Portrait": {"size": A4}, | |
"A4 Landscape": {"size": landscape(A4)}, | |
"Letter Portrait": {"size": letter}, | |
"Letter Landscape": {"size": landscape(letter)}, | |
"Legal Portrait": {"size": legal}, | |
"Legal Landscape": {"size": landscape(legal)}, | |
} | |
OUTPUT_DIR = Path("generated_pdfs") | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
FONT_DIR = Path("fonts") | |
FONT_DIR.mkdir(exist_ok=True) | |
# --- Font Handling --- | |
def download_default_font(): | |
"""Downloads DejaVuSans as a default font if no fonts are available in the 'fonts' directory.""" | |
dejavu_path = FONT_DIR / "DejaVuSans.ttf" | |
if not dejavu_path.exists(): | |
print("No fonts found. Downloading default font (DejaVuSans)...") | |
try: | |
url = "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true" | |
r = requests.get(url, allow_redirects=True) | |
r.raise_for_status() | |
with open(dejavu_path, "wb") as f: | |
f.write(r.content) | |
print("Default font downloaded successfully.") | |
return True | |
except Exception as e: | |
print(f"Failed to download default font: {e}") | |
return False | |
return True | |
def discover_and_register_fonts(): | |
"""Finds .ttf files in the font directory, registers them with ReportLab, and returns their names.""" | |
font_files = list(FONT_DIR.glob("*.ttf")) | |
if not font_files: | |
if not download_default_font(): | |
return [] # Return empty if download fails | |
font_files = list(FONT_DIR.glob("*.ttf")) | |
font_names = [] | |
for font_path in font_files: | |
try: | |
font_name = font_path.stem | |
pdfmetrics.registerFont(TTFont(font_name, str(font_path))) | |
font_names.append(font_name) | |
except Exception as e: | |
print(f"Could not register font {font_path.name}: {e}") | |
return sorted(font_names) | |
# --- ReportLab PDF Generation (Core Logic) --- | |
def markdown_to_story(markdown_text: str, font_name: str): | |
"""Converts a markdown string into a list of ReportLab Flowables ('story') with enhanced styling.""" | |
styles = getSampleStyleSheet() | |
style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6) | |
style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name) | |
style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name) | |
style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name) | |
style_code = ParagraphStyle('Code', parent=styles['Code'], fontName=font_name, backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5)) | |
story = [] | |
lines = markdown_text.split('\n') | |
in_code_block, in_table = False, False | |
code_block_text, table_data = "", [] | |
for line in lines: | |
if line.strip().startswith('|') and line.strip().endswith('|'): # Table handler | |
if not in_table: | |
in_table = True | |
table_data = [] | |
if all(c in '-|: ' for c in line.strip()): continue | |
cells = [cell.strip() for cell in line.strip().strip('|').split('|')] | |
table_data.append([Paragraph(cell, style_normal) for cell in cells]) | |
continue | |
elif in_table: | |
in_table = False | |
if table_data: | |
table = Table(table_data, hAlign='LEFT', colWidths=[1.5*inch]*len(table_data[0])) | |
table.setStyle(TableStyle([('BACKGROUND', (0,0), (-1,0), colors.lightgrey), ('GRID', (0,0), (-1,-1), 1, colors.black)])) | |
story.append(table) | |
story.append(Spacer(1, 0.2 * inch)) | |
table_data = [] | |
if line.strip().startswith("```"): # Code block handler | |
in_code_block = not in_code_block | |
if not in_code_block: | |
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)) | |
code_block_text = "" | |
continue | |
if in_code_block: | |
code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n' | |
continue | |
# Markdown elements to ReportLab Flowables | |
if line.startswith("# "): story.append(Paragraph(line[2:], style_h1)) | |
elif line.startswith("## "): story.append(Paragraph(line[3:], style_h2)) | |
elif line.startswith("### "): story.append(Paragraph(line[4:], style_h3)) | |
elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line.strip()[2:], style_normal, bulletText='β’')) | |
elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line.strip(), style_normal)) | |
elif line.strip() == "": story.append(Spacer(1, 0.1 * inch)) | |
else: story.append(Paragraph(re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)), style_normal)) | |
return story | |
# --- Gradio API Function --- | |
def generate_pdfs_api(files, layouts, fonts, progress=gr.Progress(track_tqdm=True)): | |
"""Main function to drive PDF generation from the Gradio UI.""" | |
if not files: raise gr.Error("Please upload at least one Markdown file.") | |
if not layouts: raise gr.Error("Please select at least one page layout.") | |
if not fonts: raise gr.Error("Please select at least one font.") | |
# Clean output directory before generation | |
if OUTPUT_DIR.exists(): | |
for f in OUTPUT_DIR.glob('*'): | |
if f.is_file(): os.remove(f) | |
log_updates = "Starting PDF generation...\n" | |
total_pdfs = len(files) * len(layouts) * len(fonts) | |
for md_file_obj in progress.tqdm(files, desc="Processing Files"): | |
original_md_path = Path(md_file_obj.name) | |
log_updates += f"\nProcessing: **{original_md_path.name}**\n" | |
md_content = "" | |
with open(original_md_path, 'r', encoding='utf-8') as f: | |
md_content = f.read() | |
for layout_name in layouts: | |
for font_name in fonts: | |
status_text = f" - Generating: {layout_name}, {font_name}" | |
log_updates += status_text + "\n" | |
try: | |
date_str = datetime.datetime.now().strftime("%Y-%m-%d") | |
output_filename = f"{original_md_path.stem}_{layout_name.replace(' ', '-')}_{font_name}_{date_str}.pdf" | |
output_path = OUTPUT_DIR / output_filename | |
doc = SimpleDocTemplate(str(output_path), pagesize=LAYOUTS[layout_name]["size"], rightMargin=inch, leftMargin=inch, topMargin=inch, bottomMargin=inch) | |
story = markdown_to_story(md_content, font_name) | |
doc.build(story) | |
except Exception as e: | |
log_updates += f" - **ERROR**: Failed to process with font {font_name}: {e}\n" | |
log_updates += "\nβ PDF generation complete!" | |
generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")] | |
return generated_files, log_updates | |
# --- Gradio UI Definition --- | |
AVAILABLE_FONTS = discover_and_register_fonts() | |
SAMPLE_MARKDOWN = "# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF with dynamic fonts and layouts.\n\n### Features\n- Item 1\n- Item 2\n\n| Header 1 | Header 2 |\n|----------|----------|\n| Cell 1 | Cell 2 |\n\n```python\ndef hello():\n print(\"Hello, PDF!\")\n```\n" | |
with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo: | |
gr.Markdown("# π Advanced Markdown to PDF Generator") | |
gr.Markdown("Upload Markdown files, select layouts and fonts, and generate multiple PDF variations.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("### βοΈ Generation Settings") | |
gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10) | |
uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"]) | |
selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0]) | |
if not AVAILABLE_FONTS: | |
gr.Warning("No fonts found in 'fonts' directory. Using default.") | |
AVAILABLE_FONTS = ["Helvetica"] # ReportLab's default | |
selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None) | |
generate_btn = gr.Button("π Generate PDFs", variant="primary") | |
with gr.Column(scale=2): | |
gr.Markdown("### π Results") | |
log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...") | |
file_output = gr.Files(label="Download Generated PDFs") | |
generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts], outputs=[file_output, log_output]) | |
if __name__ == "__main__": | |
demo.launch() | |