Spaces:
Build error
Build error
File size: 11,959 Bytes
e13b6e9 96b5748 6d888b2 96b5748 6d888b2 e13b6e9 6d888b2 e13b6e9 96b5748 e13b6e9 6d888b2 e13b6e9 6d888b2 e13b6e9 6d888b2 e13b6e9 96b5748 6d888b2 1f6e4e7 6d888b2 96b5748 6d888b2 04a1711 688eb59 1f6e4e7 6d888b2 688eb59 6d888b2 029ee1c 6d888b2 688eb59 6d888b2 099c24d 6d888b2 099c24d 6d888b2 688eb59 6d888b2 688eb59 6d888b2 688eb59 6d888b2 688eb59 6d888b2 688eb59 6d888b2 688eb59 6d888b2 099c24d 6d888b2 688eb59 6d888b2 1f6e4e7 6d888b2 1f6e4e7 6d888b2 1f6e4e7 6d888b2 029ee1c 6d888b2 1f6e4e7 6d888b2 96b5748 6d888b2 96b5748 5fe90b3 6d888b2 e13b6e9 6d888b2 688eb59 6d888b2 04a1711 6d888b2 e13b6e9 6d888b2 e09f4c0 6d888b2 e09f4c0 6d888b2 e09f4c0 6d888b2 e09f4c0 6d888b2 e09f4c0 6d888b2 e09f4c0 6d888b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 |
import gradio as gr
from pathlib import Path
import datetime
import re
import os
import shutil
import fitz # PyMuPDF
from PIL import Image
import io
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, BaseDocTemplate, Frame, PageTemplate, Image as ReportLabImage
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# --- Configuration ---
CWD = Path.cwd()
# Create dedicated directories for clarity
INPUT_DIR = CWD / "uploaded_files"
OUTPUT_DIR = CWD / "output_pdfs"
TEMP_DIR = CWD / "temp_emoji_images"
FONT_DIR = CWD # Assumes fonts are in the same directory as the script
# Ensure all directories exist
for d in [INPUT_DIR, OUTPUT_DIR, TEMP_DIR]:
d.mkdir(exist_ok=True)
# --- Font & Emoji Handling ---
EMOJI_FONT_PATH = None
EMOJI_IMAGE_CACHE = {}
def setup_fonts():
"""Finds the NotoColorEmoji font, which is critical for this process."""
global EMOJI_FONT_PATH
print("--- Setting up fonts ---")
# Locate the essential NotoColorEmoji font
noto_emoji_path = FONT_DIR / "NotoColorEmoji-Regular.ttf"
if noto_emoji_path.exists():
EMOJI_FONT_PATH = str(noto_emoji_path)
print(f"Found emoji font: {EMOJI_FONT_PATH}")
else:
print("CRITICAL ERROR: 'NotoColorEmoji-Regular.ttf' not found in the application directory.")
print("This file is required to render emojis as images. Please add it to the directory.")
# Raise an error to stop the app from running in a broken state
raise FileNotFoundError("Could not find NotoColorEmoji-Regular.ttf. The application cannot proceed.")
# Register a basic font for ReportLab
try:
pdfmetrics.registerFont(TTFont('DejaVuSans', 'DejaVuSans.ttf'))
pdfmetrics.registerFontFamily('DejaVuSans', normal='DejaVuSans', bold='DejaVuSans-Bold', italic='DejaVuSans-Oblique', boldItalic='DejaVuSans-BoldOblique')
except:
print("Warning: DejaVuSans font not found. Falling back to Helvetica. Please add DejaVuSans.ttf for better Unicode support.")
def render_emoji_as_image(emoji_char, size_pt):
"""
Takes a single emoji character and renders it as a transparent PNG image in memory.
This is the core of the solution to ensure emojis appear in color in any PDF viewer.
"""
if not EMOJI_FONT_PATH:
print("Cannot render emoji: Emoji font path not set.")
return None
# Use a cache to avoid re-rendering the same emoji multiple times
if (emoji_char, size_pt) in EMOJI_IMAGE_CACHE:
return EMOJI_IMAGE_CACHE[(emoji_char, size_pt)]
try:
# Use PyMuPDF (fitz) to draw the emoji onto a temporary, transparent canvas
rect = fitz.Rect(0, 0, size_pt * 1.5, size_pt * 1.5)
doc = fitz.open()
page = doc.new_page(width=rect.width, height=rect.height)
# Load the color emoji font
page.insert_font(fontname="emoji", fontfile=EMOJI_FONT_PATH)
# Insert the emoji character. The vertical alignment may need tweaking.
page.insert_text(fitz.Point(0, size_pt * 1.1), emoji_char, fontname="emoji", fontsize=size_pt)
# Get a high-resolution PNG of the emoji with a transparent background
pix = page.get_pixmap(alpha=True, dpi=300)
doc.close()
# Save the PNG to an in-memory buffer
img_buffer = io.BytesIO(pix.tobytes("png"))
img_buffer.seek(0)
# Add the buffer to the cache and return it
EMOJI_IMAGE_CACHE[(emoji_char, size_pt)] = img_buffer
return img_buffer
except Exception as e:
print(f"ERROR: Could not render emoji '{emoji_char}': {e}")
return None
# --- PDF Generation ---
def create_pdf_from_markdown(md_filepath: Path):
"""
The main function to convert a single Markdown file into a PDF.
It reads the text, processes it line by line, and replaces emojis with images.
"""
print(f"--- Starting PDF conversion for: {md_filepath.name} ---")
# Define styles for the PDF document
styles = getSampleStyleSheet()
# Use a font that supports a wide range of characters, if available
body_font = 'DejaVuSans' if 'DejaVuSans' in pdfmetrics.getRegisteredFontNames() else 'Helvetica'
style_body = ParagraphStyle('Body', fontName=body_font, fontSize=11, leading=14)
style_h1 = ParagraphStyle('H1', fontName=body_font, fontSize=24, leading=28, spaceAfter=12, textColor=colors.darkblue)
style_h2 = ParagraphStyle('H2', fontName=body_font, fontSize=18, leading=22, spaceAfter=10)
# Regex to find all emojis in a string
emoji_pattern = re.compile(f"([{re.escape(''.join(map(chr, range(0x1f600, 0x1f650))))}"
f"{re.escape(''.join(map(chr, range(0x1f300, 0x1f5ff))))}"
f"{re.escape(''.join(map(chr, range(0x1f900, 0x1f9ff))))}"
f"{re.escape(''.join(map(chr, range(0x2600, 0x26ff))))}]+)")
def text_to_flowables(text, style):
"""
Splits a line of text into a list of Paragraphs and Images.
This allows text and emoji-images to flow together on the same line.
"""
parts = emoji_pattern.split(text)
flowables = []
for part in parts:
if not part: continue
if emoji_pattern.match(part):
# This part is an emoji or a sequence of them
for emoji_char in part:
img_buffer = render_emoji_as_image(emoji_char, style.fontSize)
if img_buffer:
# Create an Image object, slightly larger than the text for better spacing
img = ReportLabImage(img_buffer, height=style.fontSize * 1.2, width=style.fontSize * 1.2)
flowables.append(img)
else:
# This part is plain text
flowables.append(Paragraph(part.replace('&', '&').replace('<', '<').replace('>', '>'), style))
# Use a Table to keep all parts on the same line. This is a common ReportLab technique.
if flowables:
return Table([flowables], colWidths=[None] * len(flowables), style=[('VALIGN', (0,0), (-1,-1), 'MIDDLE')])
return None
# Read the markdown file
try:
with open(md_filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
except Exception as e:
print(f"ERROR: Could not read file {md_filepath.name}: {e}")
return None
# The "story" is ReportLab's list of things to draw in the PDF
story = []
for line in lines:
stripped_line = line.strip()
# Simple Markdown parsing
if stripped_line.startswith('# '):
flowable = text_to_flowables(stripped_line[2:], style_h1)
elif stripped_line.startswith('## '):
flowable = text_to_flowables(stripped_line[2:], style_h2)
elif stripped_line:
flowable = text_to_flowables(stripped_line, style_body)
else:
flowable = Spacer(1, 0.2 * inch)
if flowable:
story.append(flowable)
# Generate a unique filename and path for the output PDF
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M")
output_filename = f"{md_filepath.stem}_{timestamp}.pdf"
output_filepath = OUTPUT_DIR / output_filename
# Build the PDF document
doc = SimpleDocTemplate(str(output_filepath), pagesize=letter)
try:
doc.build(story)
print(f"SUCCESS: Successfully created PDF: {output_filename}")
return output_filepath
except Exception as e:
print(f"ERROR: Failed to build PDF for {md_filepath.name}. Reason: {e}")
return None
# --- Gradio UI and Logic ---
def process_uploads(files):
"""
Takes uploaded files, processes each one into a PDF, and returns a list of generated filepaths.
"""
if not files:
raise gr.Error("Please upload at least one Markdown (.md) file.")
# Clear caches and temp directories for a clean run
EMOJI_IMAGE_CACHE.clear()
shutil.rmtree(TEMP_DIR, ignore_errors=True); TEMP_DIR.mkdir(exist_ok=True)
log_messages = []
generated_pdf_paths = []
for file_obj in files:
input_path = Path(file_obj.name)
log_messages.append(f"Processing '{input_path.name}'...")
# Core PDF creation step
output_path = create_pdf_from_markdown(input_path)
if output_path:
generated_pdf_paths.append(str(output_path))
log_messages.append(f"โ
Success! PDF saved to '{output_path.name}'.")
else:
log_messages.append(f"โ Failed to process '{input_path.name}'. Check console for errors.")
# After processing, get the full list of all PDFs in the output directory for the gallery
all_pdfs_in_gallery = sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True)
return "\n".join(log_messages), all_pdfs_in_gallery
def refresh_gallery():
"""Scans the output directory and returns a list of all PDFs found."""
return sorted([str(p) for p in OUTPUT_DIR.glob("*.pdf")], reverse=True)
# Main execution block
if __name__ == "__main__":
# This must run once at startup to check for the required font
try:
setup_fonts()
except FileNotFoundError as e:
# If the font is missing, we stop the app from launching.
print("\n" + "="*60)
print(e)
print("The application cannot start without this font file.")
print("Please add 'NotoColorEmoji-Regular.ttf' and 'DejaVuSans.ttf' to your project directory.")
print("="*60)
exit() # Stop the script
# Define the Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Markdown-to-PDF Alchemist") as demo:
gr.Markdown("# ๐ Markdown-to-PDF Alchemist")
gr.Markdown("Upload one or more `.md` files. This tool will convert them into PDFs, preserving emojis by rendering them as high-quality images. All generated PDFs will appear in the library below.")
with gr.Row():
with gr.Column(scale=1):
upload_button = gr.File(
label="Upload Markdown Files (.md)",
file_count="multiple",
file_types=[".md"],
)
generate_button = gr.Button("๐ฎ Alchemize to PDF", variant="primary")
log_output = gr.Textbox(label="Alchemist's Log", lines=8, interactive=False)
with gr.Column(scale=2):
gr.Markdown("### ๐ Generated PDF Library")
# The gallery will show the first page of the PDF as a preview
pdf_gallery = gr.Gallery(
label="Generated PDFs",
show_label=False,
elem_id="gallery",
columns=3,
object_fit="contain",
height="auto"
)
# This button allows manual refreshing of the gallery
refresh_button = gr.Button("๐ Refresh Library")
# Define the actions when buttons are clicked
generate_button.click(
fn=process_uploads,
inputs=[upload_button],
outputs=[log_output, pdf_gallery]
)
refresh_button.click(
fn=refresh_gallery,
inputs=None,
outputs=[pdf_gallery]
)
# Load the gallery with existing PDFs when the app starts
demo.load(refresh_gallery, None, pdf_gallery)
# Launch the application
demo.launch(debug=True)
|