Spaces:
Sleeping
Sleeping
import streamlit as st | |
from pathlib import Path | |
import base64 | |
import datetime | |
import re | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
from reportlab.lib.units import inch | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
from reportlab.lib import colors | |
# --- Configuration & Setup --- | |
# Define layouts using reportlab's pagesizes | |
LAYOUTS = { | |
"A4 Portrait": {"size": A4, "icon": "π"}, | |
"A4 Landscape": {"size": landscape(A4), "icon": "π"}, | |
"Letter Portrait": {"size": letter, "icon": "π"}, | |
"Letter Landscape": {"size": landscape(letter), "icon": "π"}, | |
"Legal Portrait": {"size": legal, "icon": "π"}, | |
"Legal Landscape": {"size": landscape(legal), "icon": "π"}, | |
} | |
# Directory to save the generated PDFs | |
OUTPUT_DIR = Path("generated_pdfs") | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
# Path for the required emoji font file | |
EMOJI_FONT_PATH = Path("NotoColorEmoji-Regular.ttf") | |
# Regex to find and wrap emojis for ReportLab | |
EMOJI_PATTERN = re.compile( | |
"[" | |
"\U0001F600-\U0001F64F" # emoticons | |
"\U0001F300-\U0001F5FF" # symbols & pictographs | |
"\U0001F680-\U0001F6FF" # transport & map symbols | |
"\U0001F700-\U0001F77F" # alchemical symbols | |
"\U0001F780-\U0001F7FF" # Geometric Shapes Extended | |
"\U0001F800-\U0001F8FF" # Supplemental Arrows-C | |
"\U0001F900-\U0001F9FF" # Supplemental Symbols and Pictographs | |
"\U0001FA00-\U0001FA6F" # Chess Symbols | |
"\U0001FA70-\U0001FAFF" # Symbols and Pictographs Extended-A | |
"\U00002702-\U000027B0" # Dingbats | |
"\U000024C2-\U0001F251" | |
"]+", | |
flags=re.UNICODE, | |
) | |
# --- Core PDF Generation Class --- | |
class PDFGenerator: | |
""" | |
An object-oriented approach to generating PDFs. | |
Handles font registration, markdown parsing, and PDF creation. | |
""" | |
def __init__(self, font_path: Path): | |
""" | |
β¨ To start the PDF show, a font we must know. | |
Initializes the generator and registers the necessary emoji font. | |
""" | |
self.emoji_font_name = "NotoEmoji" | |
self._register_emoji_font(font_path) | |
def _register_emoji_font(self, font_path: Path): | |
""" | |
βοΈ Before new fonts can grace the page, first they must be set on stage. | |
Registers the TTF font file with ReportLab if the file exists. | |
""" | |
if font_path.exists(): | |
pdfmetrics.registerFont(TTFont(self.emoji_font_name, font_path)) | |
else: | |
# Provide a helpful error in the web app if the font is missing | |
st.error(f"Emoji font not found at '{font_path}'. Emojis will not be rendered. Please download it.") | |
self.emoji_font_name = "Helvetica" # Fallback to a standard font | |
def _wrap_emojis_for_reportlab(self, text: str) -> str: | |
""" | |
π To make emojis appear so grand, wrap them with a font command. | |
Finds all emojis and wraps them in ReportLab <font> tags. | |
""" | |
# If the emoji font failed to register, don't try to use it. | |
if self.emoji_font_name != "NotoEmoji": | |
return text | |
# The lambda function takes each matched emoji (m) and wraps it. | |
return EMOJI_PATTERN.sub(lambda m: f'<font name="{self.emoji_font_name}">{m.group(0)}</font>', text) | |
def _markdown_to_story(self, markdown_text: str) -> list: | |
""" | |
π From markdown text, a simple scrawl, this story builder answers the call. | |
Converts a markdown string π into a list of ReportLab Flowables (a 'story'). | |
""" | |
styles = getSampleStyleSheet() | |
# Define custom styles for different markdown elements | |
style_normal = styles['BodyText'] | |
style_h1 = styles['h1'] | |
style_h2 = styles['h2'] | |
style_h3 = styles['h3'] | |
# Use a monospaced font for code blocks | |
style_code = ParagraphStyle('Code', parent=styles['Normal'], fontName='Courier', textColor=colors.darkred) | |
story = [] | |
lines = markdown_text.split('\n') | |
in_code_block = False | |
code_block_text = "" | |
for line in lines: | |
# Handle code blocks (```) | |
if line.strip().startswith("```"): | |
if in_code_block: | |
story.append(Paragraph(code_block_text, style_code)) | |
in_code_block = False | |
code_block_text = "" | |
else: | |
in_code_block = True | |
continue | |
if in_code_block: | |
# Escape HTML-sensitive characters and preserve line breaks within code | |
escaped_line = line.replace('&', '&').replace('<', '<').replace('>', '>') | |
code_block_text += escaped_line + '<br/>' | |
continue | |
# This is where we process each line for emojis BEFORE creating a Paragraph | |
processed_line = self._wrap_emojis_for_reportlab(line) | |
# Handle markdown syntax | |
if processed_line.startswith("# "): | |
story.append(Paragraph(self._wrap_emojis_for_reportlab(processed_line[2:]), style_h1)) | |
elif processed_line.startswith("## "): | |
story.append(Paragraph(self._wrap_emojis_for_reportlab(processed_line[3:]), style_h2)) | |
elif processed_line.startswith("### "): | |
story.append(Paragraph(self._wrap_emojis_for_reportlab(processed_line[4:]), style_h3)) | |
elif processed_line.strip().startswith(("* ", "- ")): | |
story.append(Paragraph(f"β’ {self._wrap_emojis_for_reportlab(processed_line.strip()[2:])}", style_normal)) | |
elif re.match(r'^\d+\.\s', processed_line.strip()): | |
story.append(Paragraph(processed_line.strip(), style_normal)) | |
elif processed_line.strip() == "": | |
story.append(Spacer(1, 0.2 * inch)) | |
else: | |
# Handle bold (**) and italics (_) using ReportLab's rich text tags | |
formatted_line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', processed_line) | |
formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', formatted_line) | |
story.append(Paragraph(formatted_line, style_normal)) | |
return story | |
def create_pdf(self, md_asset: Path, layout_name: str, layout_properties: dict): | |
""" | |
π With content and a layout's grace, this function builds the PDF space. | |
Creates a single PDF file π from a given markdown file π. | |
""" | |
try: | |
md_content = md_asset.read_text(encoding="utf-8") | |
date_str = datetime.datetime.now().strftime("%Y-%m-%d") | |
output_filename = f"{md_asset.stem}_{layout_name.replace(' ', '-')}_{date_str}.pdf" | |
output_path = OUTPUT_DIR / output_filename | |
# The SimpleDocTemplate handles the page creation and content flow | |
doc = SimpleDocTemplate( | |
str(output_path), | |
pagesize=layout_properties.get("size", A4), | |
rightMargin=inch, | |
leftMargin=inch, | |
topMargin=inch, | |
bottomMargin=inch | |
) | |
story = self._markdown_to_story(md_content) | |
# The .build() method takes the story and renders the PDF | |
doc.build(story) | |
except Exception as e: | |
st.error(f"Failed to process {md_asset.name} with ReportLab: {e}") | |
# --- Streamlit UI and File Handling --- | |
def get_file_download_link(file_path: Path) -> str: | |
""" | |
π To grab your file and not delay, a special link is paved today. | |
Generates a base64-encoded download link for a file. | |
""" | |
with open(file_path, "rb") as f: | |
data = base64.b64encode(f.read()).decode() | |
return f'<a href="data:application/octet-stream;base64,{data}" download="{file_path.name}">Download</a>' | |
def display_file_explorer(): | |
""" | |
π To see your files, both old and new, this handy explorer gives a view. | |
Renders a simple file explorer in the Streamlit app for MD and PDF files. | |
""" | |
st.header("π File Explorer") | |
st.subheader("Source Markdown Files (.md)") | |
md_files = list(Path(".").glob("*.md")) | |
if not md_files: | |
st.info("No Markdown files found. A `sample.md` has been created for you.") | |
else: | |
for md_file in md_files: | |
col1, col2 = st.columns([0.8, 0.2]) | |
with col1: | |
st.write(f"π `{md_file.name}`") | |
with col2: | |
st.markdown(get_file_download_link(md_file), unsafe_allow_html=True) | |
st.subheader("Generated PDF Files") | |
# Sort PDFs by modification time to show the newest first | |
pdf_files = sorted(list(OUTPUT_DIR.glob("*.pdf")), key=lambda p: p.stat().st_mtime, reverse=True) | |
if not pdf_files: | |
st.info("No PDFs generated yet. Click the button above to start.") | |
else: | |
for pdf_file in pdf_files: | |
col1, col2 = st.columns([0.8, 0.2]) | |
with col1: | |
st.write(f"π `{pdf_file.name}`") | |
with col2: | |
st.markdown(get_file_download_link(pdf_file), unsafe_allow_html=True) | |
# --- Main App Execution --- | |
def main(): | |
""" | |
π To run the app and make it go, call this main function, you know! | |
""" | |
st.set_page_config(layout="wide", page_title="PDF Generator") | |
st.title("π Markdown to PDF Generator") | |
st.markdown("This tool converts all `.md` files in this directory to PDF. It now supports emojis! π") | |
# Create a sample markdown file if none exist, to help new users. | |
if not list(Path(".").glob("*.md")): | |
with open("sample.md", "w", encoding="utf-8") as f: | |
f.write("# Sample Document π\n\nThis is a sample markdown file. **ReportLab** is creating the PDF. Emojis like π and π‘ should now appear correctly.\n\n### Features\n- Item 1\n- Item 2\n\n```\ndef hello_world():\n print(\"Hello, PDF! π\")\n```\n") | |
st.rerun() | |
# Instantiate our generator. It will handle font setup on its own. | |
pdf_generator = PDFGenerator(EMOJI_FONT_PATH) | |
if st.button("π Generate PDFs from all Markdown Files", type="primary"): | |
markdown_files = list(Path(".").glob("*.md")) | |
if not markdown_files: | |
st.warning("No `.md` files found. Please add a markdown file to the directory.") | |
else: | |
total_pdfs = len(markdown_files) * len(LAYOUTS) | |
progress_bar = st.progress(0, text="Starting PDF generation...") | |
pdf_count = 0 | |
with st.spinner("Generating PDFs... Please wait."): | |
for md_file in markdown_files: | |
st.info(f"Processing: **{md_file.name}**") | |
for name, properties in LAYOUTS.items(): | |
# Use the instance method to create the PDF | |
pdf_generator.create_pdf(md_file, name, properties) | |
pdf_count += 1 | |
progress_bar.progress(pdf_count / total_pdfs, f"Generated {pdf_count}/{total_pdfs} PDFs...") | |
st.success("β PDF generation complete!") | |
st.balloons() | |
# Rerun to refresh the file explorer immediately | |
st.rerun() | |
display_file_explorer() | |
if __name__ == "__main__": | |
main() |