import gradio as gr from pathlib import Path import datetime import re import os import shutil import fitz # PyMuPDF from PIL import Image import io from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.pagesizes import letter from reportlab.lib.units import inch from reportlab.lib import colors from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont # --- Configuration --- CWD = Path.cwd() # Create dedicated directories for clarity INPUT_DIR = CWD / "uploaded_files" OUTPUT_DIR = CWD / "output_pdfs" TEMP_DIR = CWD / "temp_emoji_images" FONT_DIR = CWD # Assumes fonts are in the same directory as the script # Ensure all directories exist for d in [INPUT_DIR, OUTPUT_DIR, TEMP_DIR]: d.mkdir(exist_ok=True) # --- Font & Emoji Handling --- EMOJI_FONT_PATH = None EMOJI_IMAGE_CACHE = {} def setup_fonts(): """Finds the NotoColorEmoji font, which is critical for this process.""" global EMOJI_FONT_PATH print("--- Setting up fonts ---") # Locate the essential NotoColorEmoji font noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf" if noto_emoji_path.exists(): EMOJI_FONT_PATH = str(noto_emoji_path) print(f"Found emoji font: {EMOJI_FONT_PATH}") else: print("CRITICAL ERROR: 'NotoColorEmoji-Regular.ttf' not found in the application directory.") print("This file is required to render emojis as images. Please add it to the directory.") # Raise an error to stop the app from running in a broken state raise FileNotFoundError("Could not find NotoColorEmoji-Regular.ttf. The application cannot proceed.") # Register a basic font for ReportLab try: pdfmetrics.registerFont(TTFont('DejaVuSans', 'DejaVuSans.ttf')) pdfmetrics.registerFontFamily('DejaVuSans', normal='DejaVuSans', bold='DejaVuSans-Bold', italic='DejaVuSans-Oblique', boldItalic='DejaVuSans-BoldOblique') except: print("Warning: DejaVuSans font not found. Falling back to Helvetica. Please add DejaVuSans.ttf for better Unicode support.") def render_emoji_as_image(emoji_char, size_pt): """ Takes a single emoji character and renders it as a transparent PNG image in memory. This is the core of the solution to ensure emojis appear in color in any PDF viewer. """ if not EMOJI_FONT_PATH: print("Cannot render emoji: Emoji font path not set.") return None # Use a cache to avoid re-rendering the same emoji multiple times if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE: return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] try: # Use PyMuPDF (fitz) to draw the emoji onto a temporary, transparent canvas rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5) doc = fitz.open() page = doc.new_page(width=rect.width, height=rect.height) # Load the color emoji font page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH) # Insert the emoji character. The vertical alignment may need tweaking. page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt) # Get a high-resolution PNG of the emoji with a transparent background pix = page.get_pixmap(alpha=True, dpi=300) doc.close() # Save the PNG to an in-memory buffer img_buffer = io.BytesIO(pix.tobytes("png")) img_buffer.seek(0) # Add the buffer to the cache and return it EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer return img_buffer except Exception as e: print(f"ERROR: Could not render emoji '{emoji_char}': {e}") return None # --- PDF Generation --- def create_pdf_from_markdown(md_filepath: Path): """ The main function to convert a single Markdown file into a PDF. It reads the text, processes it line by line, and replaces emojis with images. """ print(f"--- Starting PDF conversion for: {md_filepath.name} ---") # Define styles for the PDF document styles = getSampleStyleSheet() # Use a font that supports a wide range of characters, if available body_font = 'DejaVuSans' if 'DejaVuSans' in pdfmetrics.getRegisteredFontNames() else 'Helvetica' style_body = ParagraphStyle('Body', fontName=body_font, fontSize=11, leading=14) style_h1 = ParagraphStyle('H1', fontName=body_font, fontSize=24, leading=28, spaceAfter=12, textColor=colors.darkblue) style_h2 = ParagraphStyle('H2', fontName=body_font, fontSize=18, leading=22, spaceAfter=10) # Regex to find all emojis in a string emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}" f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}" f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)") def text_to_flowables(text, style): """ Splits a line of text into a list of Paragraphs and Images. This allows text and emoji-images to flow together on the same line. """ parts = emoji_pattern.split(text) flowables = [] for part in parts: if not part: continue if emoji_pattern.match(part): # This part is an emoji or a sequence of them for emoji_char in part: img_buffer = render_emoji_as_image(emoji_char, style.fontSize) if img_buffer: # Create an Image object, slightly larger than the text for better spacing img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2) flowables.append(img) else: # This part is plain text flowables.append(Paragraph(part.replace('&', '&').replace('<', '<').replace('>', '>'), style)) # Use a Table to keep all parts on the same line. This is a common ReportLab technique. if flowables: return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0,0), (-1,-1), 'MIDDLE')]) return None # Read the markdown file try: with open(md_filepath, 'r', encoding='utf-8') as f: lines = f.readlines() except Exception as e: print(f"ERROR: Could not read file {md_filepath.name}: {e}") return None # The "story" is ReportLab's list of things to draw in the PDF story = [] for line in lines: stripped_line = line.strip() # Simple Markdown parsing if stripped_line.startswith('# '): flowable = text_to_flowables(stripped_line[2:], style_h1) elif stripped_line.startswith('## '): flowable = text_to_flowables(stripped_line[2:], style_h2) elif stripped_line: flowable = text_to_flowables(stripped_line, style_body) else: flowable = Spacer(1, 0.2 * inch) if flowable: story.append(flowable) # Generate a unique filename and path for the output PDF timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M") output_filename = f"{md_filepath.stem}_{timestamp}.pdf" output_filepath = OUTPUT_DIR / output_filename # Build the PDF document doc = SimpleDocTemplate(str(output_filepath), pagesize=letter) try: doc.build(story) print(f"SUCCESS: Successfully created PDF: {output_filename}") return output_filepath except Exception as e: print(f"ERROR: Failed to build PDF for {md_filepath.name}. Reason: {e}") return None # --- Gradio UI and Logic --- def process_uploads(files): """ Takes uploaded files, processes each one into a PDF, and returns a list of generated filepaths. """ if not files: raise gr.Error("Please upload at least one Markdown (.md) file.") # Clear caches and temp directories for a clean run EMOJI_IMAGE_CACHE.clear() shutil.rmtree(TEMP_DIR, ignore_errors=True); TEMP_DIR.mkdir(exist_ok=True) log_messages = [] generated_pdf_paths = [] for file_obj in files: input_path = Path(file_obj.name) log_messages.append(f"Processing '{input_path.name}'...") # Core PDF creation step output_path = create_pdf_from_markdown(input_path) if output_path: generated_pdf_paths.append(str(output_path)) log_messages.append(f"✅ Success! PDF saved to '{output_path.name}'.") else: log_messages.append(f"❌ Failed to process '{input_path.name}'. Check console for errors.") # After processing, get the full list of all PDFs in the output directory for the gallery all_pdfs_in_gallery = sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True) return "\n".join(log_messages), all_pdfs_in_gallery def refresh_gallery(): """Scans the output directory and returns a list of all PDFs found.""" return sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True) # Main execution block if __name__ == "__main__": # This must run once at startup to check for the required font try: setup_fonts() except FileNotFoundError as e: # If the font is missing, we stop the app from launching. print("\n" + "="*60) print(e) print("The application cannot start without this font file.") print("Please add 'NotoColorEmoji-Regular.ttf' and 'DejaVuSans.ttf' to your project directory.") print("="*60) exit() # Stop the script # Define the Gradio Interface with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Markdown-to-PDF Alchemist") as demo: gr.Markdown("# 📜 Markdown-to-PDF Alchemist") gr.Markdown("Upload one or more `.md` files. This tool will convert them into PDFs, preserving emojis by rendering them as high-quality images. All generated PDFs will appear in the library below.") with gr.Row(): with gr.Column(scale=1): upload_button = gr.File( label="Upload Markdown Files (.md)", file_count="multiple", file_types=[".md"], ) generate_button = gr.Button("🔮 Alchemize to PDF", variant="primary") log_output = gr.Textbox(label="Alchemist's Log", lines=8, interactive=False) with gr.Column(scale=2): gr.Markdown("### 📚 Generated PDF Library") # The gallery will show the first page of the PDF as a preview pdf_gallery = gr.Gallery( label="Generated PDFs", show_label=False, elem_id="gallery", columns=3, object_fit="contain", height="auto" ) # This button allows manual refreshing of the gallery refresh_button = gr.Button("🔄 Refresh Library") # Define the actions when buttons are clicked generate_button.click( fn=process_uploads, inputs=[upload_button], outputs=[log_output, pdf_gallery] ) refresh_button.click( fn=refresh_gallery, inputs=None, outputs=[pdf_gallery] ) # Load the gallery with existing PDFs when the app starts demo.load(refresh_gallery, None, pdf_gallery) # Launch the application demo.launch(debug=True)