Spaces:
Build error
Build error
import gradio as gr | |
from pathlib import Path | |
import datetime | |
import re | |
import os | |
import shutil | |
import fitz # PyMuPDF | |
from PIL import Image | |
import io | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
# --- Configuration --- | |
CWD = Path.cwd() | |
# Create dedicated directories for clarity | |
INPUT_DIR = CWD / "uploaded_files" | |
OUTPUT_DIR = CWD / "output_pdfs" | |
TEMP_DIR = CWD / "temp_emoji_images" | |
FONT_DIR = CWD # Assumes fonts are in the same directory as the script | |
# Ensure all directories exist | |
for d in [INPUT_DIR, OUTPUT_DIR, TEMP_DIR]: | |
d.mkdir(exist_ok=True) | |
# --- Font & Emoji Handling --- | |
EMOJI_FONT_PATH = None | |
EMOJI_IMAGE_CACHE = {} | |
def setup_fonts(): | |
"""Finds the NotoColorEmoji font, which is critical for this process.""" | |
global EMOJI_FONT_PATH | |
print("--- Setting up fonts ---") | |
# Locate the essential NotoColorEmoji font | |
noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf" | |
if noto_emoji_path.exists(): | |
EMOJI_FONT_PATH = str(noto_emoji_path) | |
print(f"Found emoji font: {EMOJI_FONT_PATH}") | |
else: | |
print("CRITICAL ERROR: 'NotoColorEmoji-Regular.ttf' not found in the application directory.") | |
print("This file is required to render emojis as images. Please add it to the directory.") | |
# Raise an error to stop the app from running in a broken state | |
raise FileNotFoundError("Could not find NotoColorEmoji-Regular.ttf. The application cannot proceed.") | |
# Register a basic font for ReportLab | |
try: | |
pdfmetrics.registerFont(TTFont('DejaVuSans', 'DejaVuSans.ttf')) | |
pdfmetrics.registerFontFamily('DejaVuSans', normal='DejaVuSans', bold='DejaVuSans-Bold', italic='DejaVuSans-Oblique', boldItalic='DejaVuSans-BoldOblique') | |
except: | |
print("Warning: DejaVuSans font not found. Falling back to Helvetica. Please add DejaVuSans.ttf for better Unicode support.") | |
def render_emoji_as_image(emoji_char, size_pt): | |
""" | |
Takes a single emoji character and renders it as a transparent PNG image in memory. | |
This is the core of the solution to ensure emojis appear in color in any PDF viewer. | |
""" | |
if not EMOJI_FONT_PATH: | |
print("Cannot render emoji: Emoji font path not set.") | |
return None | |
# Use a cache to avoid re-rendering the same emoji multiple times | |
if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE: | |
return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] | |
try: | |
# Use PyMuPDF (fitz) to draw the emoji onto a temporary, transparent canvas | |
rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5) | |
doc = fitz.open() | |
page = doc.new_page(width=rect.width, height=rect.height) | |
# Load the color emoji font | |
page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH) | |
# Insert the emoji character. The vertical alignment may need tweaking. | |
page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt) | |
# Get a high-resolution PNG of the emoji with a transparent background | |
pix = page.get_pixmap(alpha=True, dpi=300) | |
doc.close() | |
# Save the PNG to an in-memory buffer | |
img_buffer = io.BytesIO(pix.tobytes("png")) | |
img_buffer.seek(0) | |
# Add the buffer to the cache and return it | |
EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer | |
return img_buffer | |
except Exception as e: | |
print(f"ERROR: Could not render emoji '{emoji_char}': {e}") | |
return None | |
# --- PDF Generation --- | |
def create_pdf_from_markdown(md_filepath: Path): | |
""" | |
The main function to convert a single Markdown file into a PDF. | |
It reads the text, processes it line by line, and replaces emojis with images. | |
""" | |
print(f"--- Starting PDF conversion for: {md_filepath.name} ---") | |
# Define styles for the PDF document | |
styles = getSampleStyleSheet() | |
# Use a font that supports a wide range of characters, if available | |
body_font = 'DejaVuSans' if 'DejaVuSans' in pdfmetrics.getRegisteredFontNames() else 'Helvetica' | |
style_body = ParagraphStyle('Body', fontName=body_font, fontSize=11, leading=14) | |
style_h1 = ParagraphStyle('H1', fontName=body_font, fontSize=24, leading=28, spaceAfter=12, textColor=colors.darkblue) | |
style_h2 = ParagraphStyle('H2', fontName=body_font, fontSize=18, leading=22, spaceAfter=10) | |
# Regex to find all emojis in a string | |
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}" | |
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)") | |
def text_to_flowables(text, style): | |
""" | |
Splits a line of text into a list of Paragraphs and Images. | |
This allows text and emoji-images to flow together on the same line. | |
""" | |
parts = emoji_pattern.split(text) | |
flowables = [] | |
for part in parts: | |
if not part: continue | |
if emoji_pattern.match(part): | |
# This part is an emoji or a sequence of them | |
for emoji_char in part: | |
img_buffer = render_emoji_as_image(emoji_char, style.fontSize) | |
if img_buffer: | |
# Create an Image object, slightly larger than the text for better spacing | |
img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2) | |
flowables.append(img) | |
else: | |
# This part is plain text | |
flowables.append(Paragraph(part.replace('&', '&').replace('<', '<').replace('>', '>'), style)) | |
# Use a Table to keep all parts on the same line. This is a common ReportLab technique. | |
if flowables: | |
return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0,0), (-1,-1), 'MIDDLE')]) | |
return None | |
# Read the markdown file | |
try: | |
with open(md_filepath, 'r', encoding='utf-8') as f: | |
lines = f.readlines() | |
except Exception as e: | |
print(f"ERROR: Could not read file {md_filepath.name}: {e}") | |
return None | |
# The "story" is ReportLab's list of things to draw in the PDF | |
story = [] | |
for line in lines: | |
stripped_line = line.strip() | |
# Simple Markdown parsing | |
if stripped_line.startswith('# '): | |
flowable = text_to_flowables(stripped_line[2:], style_h1) | |
elif stripped_line.startswith('## '): | |
flowable = text_to_flowables(stripped_line[2:], style_h2) | |
elif stripped_line: | |
flowable = text_to_flowables(stripped_line, style_body) | |
else: | |
flowable = Spacer(1, 0.2 * inch) | |
if flowable: | |
story.append(flowable) | |
# Generate a unique filename and path for the output PDF | |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M") | |
output_filename = f"{md_filepath.stem}_{timestamp}.pdf" | |
output_filepath = OUTPUT_DIR / output_filename | |
# Build the PDF document | |
doc = SimpleDocTemplate(str(output_filepath), pagesize=letter) | |
try: | |
doc.build(story) | |
print(f"SUCCESS: Successfully created PDF: {output_filename}") | |
return output_filepath | |
except Exception as e: | |
print(f"ERROR: Failed to build PDF for {md_filepath.name}. Reason: {e}") | |
return None | |
# --- Gradio UI and Logic --- | |
def process_uploads(files): | |
""" | |
Takes uploaded files, processes each one into a PDF, and returns a list of generated filepaths. | |
""" | |
if not files: | |
raise gr.Error("Please upload at least one Markdown (.md) file.") | |
# Clear caches and temp directories for a clean run | |
EMOJI_IMAGE_CACHE.clear() | |
shutil.rmtree(TEMP_DIR, ignore_errors=True); TEMP_DIR.mkdir(exist_ok=True) | |
log_messages = [] | |
generated_pdf_paths = [] | |
for file_obj in files: | |
input_path = Path(file_obj.name) | |
log_messages.append(f"Processing '{input_path.name}'...") | |
# Core PDF creation step | |
output_path = create_pdf_from_markdown(input_path) | |
if output_path: | |
generated_pdf_paths.append(str(output_path)) | |
log_messages.append(f"โ Success! PDF saved to '{output_path.name}'.") | |
else: | |
log_messages.append(f"โ Failed to process '{input_path.name}'. Check console for errors.") | |
# After processing, get the full list of all PDFs in the output directory for the gallery | |
all_pdfs_in_gallery = sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True) | |
return "\n".join(log_messages), all_pdfs_in_gallery | |
def refresh_gallery(): | |
"""Scans the output directory and returns a list of all PDFs found.""" | |
return sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True) | |
# Main execution block | |
if __name__ == "__main__": | |
# This must run once at startup to check for the required font | |
try: | |
setup_fonts() | |
except FileNotFoundError as e: | |
# If the font is missing, we stop the app from launching. | |
print("\n" + "="*60) | |
print(e) | |
print("The application cannot start without this font file.") | |
print("Please add 'NotoColorEmoji-Regular.ttf' and 'DejaVuSans.ttf' to your project directory.") | |
print("="*60) | |
exit() # Stop the script | |
# Define the Gradio Interface | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Markdown-to-PDF Alchemist") as demo: | |
gr.Markdown("# ๐ Markdown-to-PDF Alchemist") | |
gr.Markdown("Upload one or more `.md` files. This tool will convert them into PDFs, preserving emojis by rendering them as high-quality images. All generated PDFs will appear in the library below.") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
upload_button = gr.File( | |
label="Upload Markdown Files (.md)", | |
file_count="multiple", | |
file_types=[".md"], | |
) | |
generate_button = gr.Button("๐ฎ Alchemize to PDF", variant="primary") | |
log_output = gr.Textbox(label="Alchemist's Log", lines=8, interactive=False) | |
with gr.Column(scale=2): | |
gr.Markdown("### ๐ Generated PDF Library") | |
# The gallery will show the first page of the PDF as a preview | |
pdf_gallery = gr.Gallery( | |
label="Generated PDFs", | |
show_label=False, | |
elem_id="gallery", | |
columns=3, | |
object_fit="contain", | |
height="auto" | |
) | |
# This button allows manual refreshing of the gallery | |
refresh_button = gr.Button("๐ Refresh Library") | |
# Define the actions when buttons are clicked | |
generate_button.click( | |
fn=process_uploads, | |
inputs=[upload_button], | |
outputs=[log_output, pdf_gallery] | |
) | |
refresh_button.click( | |
fn=refresh_gallery, | |
inputs=None, | |
outputs=[pdf_gallery] | |
) | |
# Load the gallery with existing PDFs when the app starts | |
demo.load(refresh_gallery, None, pdf_gallery) | |
# Launch the application | |
demo.launch(debug=True) | |