import gradio as gr from pathlib import Path import datetime import re import requests import os import shutil from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.pagesizes import letter, A4, legal, landscape from reportlab.lib.units import inch from reportlab.lib import colors from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont # --- Configuration & Setup --- LAYOUTS = { "A4 Portrait": {"size": A4}, "A4 Landscape": {"size": landscape(A4)}, "Letter Portrait": {"size": letter}, "Letter Landscape": {"size": landscape(letter)}, "Legal Portrait": {"size": legal}, "Legal Landscape": {"size": landscape(legal)}, } OUTPUT_DIR = Path("generated_pdfs") OUTPUT_DIR.mkdir(exist_ok=True) FONT_DIR = Path("fonts") FONT_DIR.mkdir(exist_ok=True) EMOJI_FONT_NAME = "NotoColorEmoji" # --- Font & Emoji Handling --- def download_fonts(): """Downloads DejaVuSans (for text) and NotoColorEmoji (for emojis) if they don't exist.""" fonts_to_check = { "DejaVuSans.ttf": "https://github.com/dejavu-fonts/dejavu-fonts/blob/main/ttf/DejaVuSans.ttf?raw=true", "NotoColorEmoji.ttf": "https://github.com/googlefonts/noto-emoji/blob/main/fonts/NotoColorEmoji.ttf?raw=true" } for font_filename, url in fonts_to_check.items(): font_path = FONT_DIR / font_filename if not font_path.exists(): print(f"Downloading {font_filename}...") try: r = requests.get(url, allow_redirects=True) r.raise_for_status() with open(font_path, "wb") as f: f.write(r.content) print(f"{font_filename} downloaded successfully.") except Exception as e: print(f"Failed to download {font_filename}: {e}") def discover_and_register_fonts(): """Finds .ttf files, registers them, and ensures the emoji font is available.""" download_fonts() font_files = list(FONT_DIR.glob("*.ttf")) font_names = [] for font_path in font_files: try: font_name = font_path.stem pdfmetrics.registerFont(TTFont(font_name, str(font_path))) # Don't add the emoji font to the user-selectable list if "emoji" not in font_name.lower(): font_names.append(font_name) except Exception as e: print(f"Could not register font {font_path.name}: {e}") return sorted(font_names) def apply_emoji_font(text: str) -> str: """Wraps emoji characters in a tag to use the dedicated emoji font.""" # This regex covers a wide range of emoji unicode blocks. emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}" f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}" f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}" f"{re.escape(''.join(map(chr, range(0x2700, 0x27bf))))}]+)") return emoji_pattern.sub(fr'\1', text) # --- ReportLab PDF Generation (Core Logic) --- def markdown_to_story(markdown_text: str, font_name: str): """Converts markdown to a ReportLab story, handling emojis and page breaks.""" styles = getSampleStyleSheet() style_normal = ParagraphStyle('BodyText', parent=styles['BodyText'], fontName=font_name, spaceAfter=6, leading=14) style_h1 = ParagraphStyle('h1', parent=styles['h1'], fontName=font_name, spaceBefore=12, fontSize=20, leading=24) style_h2 = ParagraphStyle('h2', parent=styles['h2'], fontName=font_name, spaceBefore=10, fontSize=16, leading=20) style_h3 = ParagraphStyle('h3', parent=styles['h3'], fontName=font_name, spaceBefore=8, fontSize=14, leading=18) style_code = ParagraphStyle('Code', parent=styles['Code'], fontName='Courier', backColor=colors.whitesmoke, borderColor=colors.lightgrey, borderWidth=1, padding=(5, 5)) story = [] # Split by our custom page break marker or process as a single block pages = markdown_text.split('\n\n---PAGE_BREAK---\n\n') for i, page_content in enumerate(pages): lines = page_content.split('\n') in_code_block, in_table = False, False code_block_text, table_data = "", [] for line in lines: line_with_emoji = apply_emoji_font(line) # Table handler... if line.strip().startswith('|') and line.strip().endswith('|'): if not in_table: in_table = True if all(c in '-|: ' for c in line.strip()): continue cells = [apply_emoji_font(c.strip()) for c in line.strip().strip('|').split('|')] table_data.append([Paragraph(cell, style_normal) for cell in cells]) continue elif in_table: in_table = False if table_data: table = Table(table_data, hAlign='LEFT', repeatRows=1) table.setStyle(TableStyle([ ('BACKGROUND', (0,0), (-1,0), colors.lightgrey), ('TEXTCOLOR', (0,0), (-1,0), colors.black), ('ALIGN', (0,0), (-1,-1), 'LEFT'), ('VALIGN', (0,0), (-1,-1), 'MIDDLE'), ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'), ('BOTTOMPADDING', (0,0), (-1,0), 12), ('GRID', (0,0), (-1,-1), 1, colors.black) ])) story.append(table) story.append(Spacer(1, 0.2 * inch)) # Code block handler... if line.strip().startswith("```"): in_code_block = not in_code_block if not in_code_block: story.append(Paragraph(code_block_text.replace('\n', '
'), style_code)) code_block_text = "" continue if in_code_block: code_block_text += line.replace('&', '&').replace('<', '<').replace('>', '>') + '\n' continue # Markdown elements to Flowables if line.startswith("# "): story.append(Paragraph(line_with_emoji[2:], style_h1)) elif line.startswith("## "): story.append(Paragraph(line_with_emoji[3:], style_h2)) elif line.startswith("### "): story.append(Paragraph(line_with_emoji[4:], style_h3)) elif line.strip().startswith(("* ", "- ")): story.append(Paragraph(line_with_emoji.strip()[2:], style_normal, bulletText='•')) elif re.match(r'^\d+\.\s', line.strip()): story.append(Paragraph(line_with_emoji.strip(), style_normal)) elif line.strip() == "": story.append(Spacer(1, 0.1 * inch)) else: formatted_line = re.sub(r'_(.*?)_', r'\1', re.sub(r'\*\*(.*?)\*\*', r'\1', line_with_emoji)) story.append(Paragraph(formatted_line, style_normal)) if i < len(pages) - 1: story.append(PageBreak()) return story # --- Gradio API Function --- def generate_pdfs_api(files, layouts, fonts, combine_files, num_columns, progress=gr.Progress(track_tqdm=True)): if not files: raise gr.Error("Please upload at least one Markdown file.") if not layouts: raise gr.Error("Please select at least one page layout.") if not fonts: raise gr.Error("Please select at least one font.") if OUTPUT_DIR.exists(): shutil.rmtree(OUTPUT_DIR) OUTPUT_DIR.mkdir(exist_ok=True) log_updates = "Starting PDF generation...\n" md_contents = [] for md_file_obj in files: with open(md_file_obj.name, 'r', encoding='utf-8') as f: md_contents.append(f.read()) tasks = [] if combine_files: combined_content = '\n\n---PAGE_BREAK---\n\n'.join(md_contents) for layout_name in layouts: for font_name in fonts: tasks.append({"content": combined_content, "layout": layout_name, "font": font_name, "filename_stem": "Combined_Document"}) else: for i, content in enumerate(md_contents): filename_stem = Path(files[i].name).stem for layout_name in layouts: for font_name in fonts: tasks.append({"content": content, "layout": layout_name, "font": font_name, "filename_stem": filename_stem}) for task in progress.tqdm(tasks, desc="Generating PDFs"): try: date_str = datetime.datetime.now().strftime("%Y-%m-%d") output_filename = f"{task['filename_stem']}_{task['layout'].replace(' ', '-')}_{task['font']}_Cols{num_columns}_{date_str}.pdf" output_path = OUTPUT_DIR / output_filename log_updates += f" - Generating: {output_filename}\n" story = markdown_to_story(task['content'], task['font']) pagesize = LAYOUTS[task['layout']]["size"] if num_columns > 1: doc = BaseDocTemplate(str(output_path), pagesize=pagesize, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch) frame_width = (doc.width / num_columns) - (num_columns - 1) * 0.1*inch frames = [Frame(doc.leftMargin + i * (frame_width + 0.2*inch), doc.bottomMargin, frame_width, doc.height, id=f'col{i}') for i in range(num_columns)] doc.addPageTemplates([PageTemplate(id='TwoCol', frames=frames)]) doc.build(story) else: doc = SimpleDocTemplate(str(output_path), pagesize=pagesize, leftMargin=inch, rightMargin=inch, topMargin=inch, bottomMargin=inch) doc.build(story) except Exception as e: log_updates += f" - **ERROR**: {e}\n" log_updates += "\nāœ… PDF generation complete!" generated_files = [str(f) for f in OUTPUT_DIR.glob("*.pdf")] return generated_files, log_updates # --- Gradio UI Definition --- AVAILABLE_FONTS = discover_and_register_fonts() SAMPLE_MARKDOWN = "# Sample Document šŸš€\n\nThis document shows **bold text**, _italic text_, and emojis like 😊 and šŸ’».\n\n### A Table\n| Flavor | Rating |\n|-------------|------------|\n| Chocolate | 10/10 |\n| Vanilla | 9/10 |" with gr.Blocks(theme=gr.themes.Soft(), title="Advanced PDF Generator") as demo: gr.Markdown("# šŸ“„ Advanced PDF Generator with Emojis & Columns") gr.Markdown("Upload Markdown files, combine them, and generate multi-column PDFs with custom fonts and layouts.") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### āš™ļø Generation Settings") uploaded_files = gr.File(label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"]) combine_files_check = gr.Checkbox(label="Combine uploaded files into a single PDF", value=False) num_columns_slider = gr.Slider(label="Number of Columns", minimum=1, maximum=4, step=1, value=1) selected_layouts = gr.CheckboxGroup(choices=list(LAYOUTS.keys()), label="Select Page Layouts", value=list(LAYOUTS.keys())[0]) if not AVAILABLE_FONTS: gr.Warning("No text fonts found in 'fonts' directory. Using defaults.") AVAILABLE_FONTS = ["Helvetica"] selected_fonts = gr.CheckboxGroup(choices=AVAILABLE_FONTS, label="Select Text Fonts to Use", value=AVAILABLE_FONTS[0] if AVAILABLE_FONTS else None) generate_btn = gr.Button("šŸš€ Generate PDFs", variant="primary") gr.Textbox(value=SAMPLE_MARKDOWN, label="Sample Markdown (for reference)", lines=10, interactive=False) with gr.Column(scale=2): gr.Markdown("### šŸ“„ Results") log_output = gr.Markdown(label="Generation Log", value="Logs will appear here...") file_output = gr.Files(label="Download Generated PDFs") generate_btn.click(fn=generate_pdfs_api, inputs=[uploaded_files, selected_layouts, selected_fonts, combine_files_check, num_columns_slider], outputs=[file_output, log_output]) if __name__ == "__main__": demo.launch()