Spaces:
Build error
Build error
| import gradio as gr | |
| from pathlib import Path | |
| import datetime | |
| import re | |
| import requests | |
| import os | |
| import shutil | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
| from reportlab.lib.units import inch | |
| from reportlab.lib import colors | |
| from reportlab.pdfbase import pdfmetrics | |
| from reportlab.pdfbase.ttfonts import TTFont | |
| # --- Configuration & Setup --- | |
| # These settings define the available layouts and directories for fonts and outputs. | |
| LAYOUTS = { | |
| "A4 Portrait": {"size": A4}, | |
| "A4 Landscape": {"size": landscape(A4)}, | |
| "Letter Portrait": {"size": letter}, | |
| "Letter Landscape": {"size": landscape(letter)}, | |
| "Legal Portrait": {"size": legal}, | |
| "Legal Landscape": {"size": landscape(legal)}, | |
| } | |
| OUTPUT_DIR = Path("generated_pdfs") | |
| OUTPUT_DIR.mkdir(exist_ok=True) | |
| FONT_DIR = Path("fonts") | |
| FONT_DIR.mkdir(exist_ok=True) | |
| # --- Font Handling --- | |
| def download_default_font(): | |
| """Downloads DejaVuSans as a default font if no fonts are available in the 'fonts' directory.""" | |
| dejavu_path = FONT_DIR / "DejaVuSans.ttf" | |
| if not dejavu_path.exists(): | |
| print("No fonts found. Downloading default font (DejaVuSans)...") | |
| try: | |
| url = "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true" | |
| r = requests.get(url, allow_redirects=True) | |
| r.raise_for_status() | |
| with open(dejavu_path, "wb") as f: | |
| f.write(r.content) | |
| print("Default font downloaded successfully.") | |
| return True | |
| except Exception as e: | |
| print(f"Failed to download default font: {e}") | |
| return False | |
| return True | |
| def discover_and_register_fonts(): | |
| """Finds .ttf files in the font directory, registers them with ReportLab, and returns their names.""" | |
| font_files = list(FONT_DIR.glob("*.ttf")) | |
| if not font_files: | |
| if not download_default_font(): | |
| return [] # Return empty if download fails | |
| font_files = list(FONT_DIR.glob("*.ttf")) | |
| font_names = [] | |
| for font_path in font_files: | |
| try: | |
| font_name = font_path.stem | |
| pdfmetrics.registerFont(TTFont(font_name, str(font_path))) | |
| font_names.append(font_name) | |
| except Exception as e: | |
| print(f"Could not register font {font_path.name}: {e}") | |
| return sorted(font_names) | |
| # --- ReportLab PDF Generation (Core Logic) --- | |
| def markdown_to_story(markdown_text: str, font_name: str): | |
| """Converts a markdown string into a list of ReportLab Flowables ('story') with enhanced styling.""" | |
| styles = getSampleStyleSheet() | |
| style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6) | |
| style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name) | |
| style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name) | |
| style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name) | |
| style_code = ParagraphStyle('Code', parent=styles['Code'], fontName=font_name, backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5)) | |
| story = [] | |
| lines = markdown_text.split('\n') | |
| in_code_block, in_table = False, False | |
| code_block_text, table_data = "", [] | |
| for line in lines: | |
| if line.strip().startswith('|') and line.strip().endswith('|'): # Table handler | |
| if not in_table: | |
| in_table = True | |
| table_data = [] | |
| if all(c in '-|: ' for c in line.strip()): continue | |
| cells = [cell.strip() for cell in line.strip().strip('|').split('|')] | |
| table_data.append([Paragraph(cell, style_normal) for cell in cells]) | |
| continue | |
| elif in_table: | |
| in_table = False | |
| if table_data: | |
| table = Table(table_data, hAlign='LEFT', colWidths=[1.5*inch]*len(table_data[0])) | |
| table.setStyle(TableStyle([('BACKGROUND', (0,0), (-1,0), colors.lightgrey), ('GRID', (0,0), (-1,-1), 1, colors.black)])) | |
| story.append(table) | |
| story.append(Spacer(1, 0.2 * inch)) | |
| table_data = [] | |
| if line.strip().startswith("```"): # Code block handler | |
| in_code_block = not in_code_block | |
| if not in_code_block: | |
| story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)) | |
| code_block_text = "" | |
| continue | |
| if in_code_block: | |
| code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n' | |
| continue | |
| # Markdown elements to ReportLab Flowables | |
| if line.startswith("# "): story.append(Paragraph(line[2:], style_h1)) | |
| elif line.startswith("## "): story.append(Paragraph(line[3:], style_h2)) | |
| elif line.startswith("### "): story.append(Paragraph(line[4:], style_h3)) | |
| elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line.strip()[2:], style_normal, bulletText='β’')) | |
| elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line.strip(), style_normal)) | |
| elif line.strip() == "": story.append(Spacer(1, 0.1 * inch)) | |
| else: story.append(Paragraph(re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)), style_normal)) | |
| return story | |
| # --- Gradio API Function --- | |
| def generate_pdfs_api(files, layouts, fonts, progress=gr.Progress(track_tqdm=True)): | |
| """Main function to drive PDF generation from the Gradio UI.""" | |
| if not files: raise gr.Error("Please upload at least one Markdown file.") | |
| if not layouts: raise gr.Error("Please select at least one page layout.") | |
| if not fonts: raise gr.Error("Please select at least one font.") | |
| # Clean output directory before generation | |
| if OUTPUT_DIR.exists(): | |
| for f in OUTPUT_DIR.glob('*'): | |
| if f.is_file(): os.remove(f) | |
| log_updates = "Starting PDF generation...\n" | |
| total_pdfs = len(files) * len(layouts) * len(fonts) | |
| for md_file_obj in progress.tqdm(files, desc="Processing Files"): | |
| original_md_path = Path(md_file_obj.name) | |
| log_updates += f"\nProcessing: **{original_md_path.name}**\n" | |
| md_content = "" | |
| with open(original_md_path, 'r', encoding='utf-8') as f: | |
| md_content = f.read() | |
| for layout_name in layouts: | |
| for font_name in fonts: | |
| status_text = f" - Generating: {layout_name}, {font_name}" | |
| log_updates += status_text + "\n" | |
| try: | |
| date_str = datetime.datetime.now().strftime("%Y-%m-%d") | |
| output_filename = f"{original_md_path.stem}_{layout_name.replace(' ', '-')}_{font_name}_{date_str}.pdf" | |
| output_path = OUTPUT_DIR / output_filename | |
| doc = SimpleDocTemplate(str(output_path), pagesize=LAYOUTS[layout_name]["size"], rightMargin=inch, leftMargin=inch, topMargin=inch, bottomMargin=inch) | |
| story = markdown_to_story(md_content, font_name) | |
| doc.build(story) | |
| except Exception as e: | |
| log_updates += f" - **ERROR**: Failed to process with font {font_name}: {e}\n" | |
| log_updates += "\nβ PDF generation complete!" | |
| generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")] | |
| return generated_files, log_updates | |
| # --- Gradio UI Definition --- | |
| AVAILABLE_FONTS = discover_and_register_fonts() | |
| SAMPLE_MARKDOWN = "# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF with dynamic fonts and layouts.\n\n### Features\n- Item 1\n- Item 2\n\n| Header 1 | Header 2 |\n|----------|----------|\n| Cell 1 | Cell 2 |\n\n```python\ndef hello():\n print(\"Hello, PDF!\")\n```\n" | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo: | |
| gr.Markdown("# π Advanced Markdown to PDF Generator") | |
| gr.Markdown("Upload Markdown files, select layouts and fonts, and generate multiple PDF variations.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### βοΈ Generation Settings") | |
| gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10) | |
| uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"]) | |
| selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0]) | |
| if not AVAILABLE_FONTS: | |
| gr.Warning("No fonts found in 'fonts' directory. Using default.") | |
| AVAILABLE_FONTS = ["Helvetica"] # ReportLab's default | |
| selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None) | |
| generate_btn = gr.Button("π Generate PDFs", variant="primary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Results") | |
| log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...") | |
| file_output = gr.Files(label="Download Generated PDFs") | |
| generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts], outputs=[file_output, log_output]) | |
| if __name__ == "__main__": | |
| demo.launch() | |