File size: 11,959 Bytes
e13b6e9
 
 
 
 
 
96b5748
6d888b2
96b5748
 
6d888b2
e13b6e9
6d888b2
e13b6e9
96b5748
e13b6e9
 
 
6d888b2
e13b6e9
6d888b2
 
 
 
 
e13b6e9
6d888b2
 
 
e13b6e9
96b5748
6d888b2
 
1f6e4e7
6d888b2
 
 
 
96b5748
6d888b2
04a1711
688eb59
1f6e4e7
6d888b2
688eb59
6d888b2
 
 
 
 
 
029ee1c
6d888b2
 
 
 
688eb59
6d888b2
099c24d
6d888b2
 
099c24d
6d888b2
 
 
 
 
 
 
688eb59
 
6d888b2
 
 
 
688eb59
6d888b2
 
 
 
 
688eb59
6d888b2
 
 
688eb59
6d888b2
 
 
688eb59
6d888b2
 
 
 
 
 
688eb59
6d888b2
 
 
 
 
 
 
099c24d
6d888b2
688eb59
6d888b2
 
 
 
 
 
 
1f6e4e7
 
 
6d888b2
 
 
 
 
 
 
 
 
1f6e4e7
 
6d888b2
1f6e4e7
6d888b2
029ee1c
6d888b2
 
 
 
 
1f6e4e7
6d888b2
 
 
 
 
 
 
96b5748
6d888b2
 
 
 
 
 
 
 
 
 
96b5748
5fe90b3
 
6d888b2
 
 
 
 
 
 
 
 
 
 
 
e13b6e9
6d888b2
 
 
 
688eb59
6d888b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04a1711
6d888b2
 
 
e13b6e9
6d888b2
 
e09f4c0
6d888b2
 
 
 
 
 
 
 
 
 
 
 
e09f4c0
6d888b2
 
e09f4c0
6d888b2
e09f4c0
6d888b2
 
 
e09f4c0
6d888b2
e09f4c0
6d888b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
import gradio as gr
from pathlib import Path
import datetime
import re
import os
import shutil
import fitz  # PyMuPDF
from PIL import Image
import io

from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

# --- Configuration ---
CWD = Path.cwd()
# Create dedicated directories for clarity
INPUT_DIR = CWD / "uploaded_files"
OUTPUT_DIR = CWD / "output_pdfs"
TEMP_DIR = CWD / "temp_emoji_images"
FONT_DIR = CWD  # Assumes fonts are in the same directory as the script

# Ensure all directories exist
for d in [INPUT_DIR, OUTPUT_DIR, TEMP_DIR]:
    d.mkdir(exist_ok=True)

# --- Font & Emoji Handling ---
EMOJI_FONT_PATH = None
EMOJI_IMAGE_CACHE = {}

def setup_fonts():
    """Finds the NotoColorEmoji font, which is critical for this process."""
    global EMOJI_FONT_PATH
    print("--- Setting up fonts ---")
    
    # Locate the essential NotoColorEmoji font
    noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf"
    if noto_emoji_path.exists():
        EMOJI_FONT_PATH = str(noto_emoji_path)
        print(f"Found emoji font: {EMOJI_FONT_PATH}")
    else:
        print("CRITICAL ERROR: 'NotoColorEmoji-Regular.ttf' not found in the application directory.")
        print("This file is required to render emojis as images. Please add it to the directory.")
        # Raise an error to stop the app from running in a broken state
        raise FileNotFoundError("Could not find NotoColorEmoji-Regular.ttf. The application cannot proceed.")
        
    # Register a basic font for ReportLab
    try:
        pdfmetrics.registerFont(TTFont('DejaVuSans', 'DejaVuSans.ttf'))
        pdfmetrics.registerFontFamily('DejaVuSans', normal='DejaVuSans', bold='DejaVuSans-Bold', italic='DejaVuSans-Oblique', boldItalic='DejaVuSans-BoldOblique')
    except:
        print("Warning: DejaVuSans font not found. Falling back to Helvetica. Please add DejaVuSans.ttf for better Unicode support.")

def render_emoji_as_image(emoji_char, size_pt):
    """
    Takes a single emoji character and renders it as a transparent PNG image in memory.
    This is the core of the solution to ensure emojis appear in color in any PDF viewer.
    """
    if not EMOJI_FONT_PATH:
        print("Cannot render emoji: Emoji font path not set.")
        return None
    
    # Use a cache to avoid re-rendering the same emoji multiple times
    if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE:
        return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)]

    try:
        # Use PyMuPDF (fitz) to draw the emoji onto a temporary, transparent canvas
        rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5)
        doc = fitz.open()
        page = doc.new_page(width=rect.width, height=rect.height)
        
        # Load the color emoji font
        page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH)
        
        # Insert the emoji character. The vertical alignment may need tweaking.
        page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt)
        
        # Get a high-resolution PNG of the emoji with a transparent background
        pix = page.get_pixmap(alpha=True, dpi=300)
        doc.close()

        # Save the PNG to an in-memory buffer
        img_buffer = io.BytesIO(pix.tobytes("png"))
        img_buffer.seek(0)

        # Add the buffer to the cache and return it
        EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer
        return img_buffer
    except Exception as e:
        print(f"ERROR: Could not render emoji '{emoji_char}': {e}")
        return None

# --- PDF Generation ---
def create_pdf_from_markdown(md_filepath: Path):
    """
    The main function to convert a single Markdown file into a PDF.
    It reads the text, processes it line by line, and replaces emojis with images.
    """
    print(f"--- Starting PDF conversion for: {md_filepath.name} ---")
    
    # Define styles for the PDF document
    styles = getSampleStyleSheet()
    # Use a font that supports a wide range of characters, if available
    body_font = 'DejaVuSans' if 'DejaVuSans' in pdfmetrics.getRegisteredFontNames() else 'Helvetica'
    style_body = ParagraphStyle('Body', fontName=body_font, fontSize=11, leading=14)
    style_h1 = ParagraphStyle('H1', fontName=body_font, fontSize=24, leading=28, spaceAfter=12, textColor=colors.darkblue)
    style_h2 = ParagraphStyle('H2', fontName=body_font, fontSize=18, leading=22, spaceAfter=10)
    
    # Regex to find all emojis in a string
    emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
                               f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
                               f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
                               f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)")

    def text_to_flowables(text, style):
        """
        Splits a line of text into a list of Paragraphs and Images.
        This allows text and emoji-images to flow together on the same line.
        """
        parts = emoji_pattern.split(text)
        flowables = []
        for part in parts:
            if not part: continue
            
            if emoji_pattern.match(part):
                # This part is an emoji or a sequence of them
                for emoji_char in part:
                    img_buffer = render_emoji_as_image(emoji_char, style.fontSize)
                    if img_buffer:
                        # Create an Image object, slightly larger than the text for better spacing
                        img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2)
                        flowables.append(img)
            else:
                # This part is plain text
                flowables.append(Paragraph(part.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;'), style))
        
        # Use a Table to keep all parts on the same line. This is a common ReportLab technique.
        if flowables:
            return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0,0), (-1,-1), 'MIDDLE')])
        return None

    # Read the markdown file
    try:
        with open(md_filepath, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception as e:
        print(f"ERROR: Could not read file {md_filepath.name}: {e}")
        return None
        
    # The "story" is ReportLab's list of things to draw in the PDF
    story = []
    for line in lines:
        stripped_line = line.strip()
        
        # Simple Markdown parsing
        if stripped_line.startswith('# '):
            flowable = text_to_flowables(stripped_line[2:], style_h1)
        elif stripped_line.startswith('## '):
            flowable = text_to_flowables(stripped_line[2:], style_h2)
        elif stripped_line:
            flowable = text_to_flowables(stripped_line, style_body)
        else:
            flowable = Spacer(1, 0.2 * inch)
            
        if flowable:
            story.append(flowable)

    # Generate a unique filename and path for the output PDF
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M")
    output_filename = f"{md_filepath.stem}_{timestamp}.pdf"
    output_filepath = OUTPUT_DIR / output_filename

    # Build the PDF document
    doc = SimpleDocTemplate(str(output_filepath), pagesize=letter)
    try:
        doc.build(story)
        print(f"SUCCESS: Successfully created PDF: {output_filename}")
        return output_filepath
    except Exception as e:
        print(f"ERROR: Failed to build PDF for {md_filepath.name}. Reason: {e}")
        return None

# --- Gradio UI and Logic ---
def process_uploads(files):
    """
    Takes uploaded files, processes each one into a PDF, and returns a list of generated filepaths.
    """
    if not files:
        raise gr.Error("Please upload at least one Markdown (.md) file.")
    
    # Clear caches and temp directories for a clean run
    EMOJI_IMAGE_CACHE.clear()
    shutil.rmtree(TEMP_DIR, ignore_errors=True); TEMP_DIR.mkdir(exist_ok=True)
    
    log_messages = []
    generated_pdf_paths = []

    for file_obj in files:
        input_path = Path(file_obj.name)
        log_messages.append(f"Processing '{input_path.name}'...")
        
        # Core PDF creation step
        output_path = create_pdf_from_markdown(input_path)
        
        if output_path:
            generated_pdf_paths.append(str(output_path))
            log_messages.append(f"โœ… Success! PDF saved to '{output_path.name}'.")
        else:
            log_messages.append(f"โŒ Failed to process '{input_path.name}'. Check console for errors.")
            
    # After processing, get the full list of all PDFs in the output directory for the gallery
    all_pdfs_in_gallery = sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True)
    
    return "\n".join(log_messages), all_pdfs_in_gallery

def refresh_gallery():
    """Scans the output directory and returns a list of all PDFs found."""
    return sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True)

# Main execution block
if __name__ == "__main__":
    # This must run once at startup to check for the required font
    try:
        setup_fonts()
    except FileNotFoundError as e:
        # If the font is missing, we stop the app from launching.
        print("\n" + "="*60)
        print(e)
        print("The application cannot start without this font file.")
        print("Please add 'NotoColorEmoji-Regular.ttf' and 'DejaVuSans.ttf' to your project directory.")
        print("="*60)
        exit() # Stop the script

    # Define the Gradio Interface
    with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Markdown-to-PDF Alchemist") as demo:
        gr.Markdown("# ๐Ÿ“œ Markdown-to-PDF Alchemist")
        gr.Markdown("Upload one or more `.md` files. This tool will convert them into PDFs, preserving emojis by rendering them as high-quality images. All generated PDFs will appear in the library below.")

        with gr.Row():
            with gr.Column(scale=1):
                upload_button = gr.File(
                    label="Upload Markdown Files (.md)",
                    file_count="multiple",
                    file_types=[".md"],
                )
                generate_button = gr.Button("๐Ÿ”ฎ Alchemize to PDF", variant="primary")
                log_output = gr.Textbox(label="Alchemist's Log", lines=8, interactive=False)

            with gr.Column(scale=2):
                gr.Markdown("### ๐Ÿ“š Generated PDF Library")
                # The gallery will show the first page of the PDF as a preview
                pdf_gallery = gr.Gallery(
                    label="Generated PDFs",
                    show_label=False,
                    elem_id="gallery",
                    columns=3,
                    object_fit="contain",
                    height="auto"
                )
                # This button allows manual refreshing of the gallery
                refresh_button = gr.Button("๐Ÿ”„ Refresh Library")

        # Define the actions when buttons are clicked
        generate_button.click(
            fn=process_uploads,
            inputs=[upload_button],
            outputs=[log_output, pdf_gallery]
        )
        
        refresh_button.click(
            fn=refresh_gallery,
            inputs=None,
            outputs=[pdf_gallery]
        )
        
        # Load the gallery with existing PDFs when the app starts
        demo.load(refresh_gallery, None, pdf_gallery)

    # Launch the application
    demo.launch(debug=True)