Spaces:
Sleeping
Sleeping
import streamlit as st | |
from pathlib import Path | |
import base64 | |
import datetime | |
import re | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.pagesizes import letter, A4, legal, landscape | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
# --- Configuration & Setup --- | |
# Define layouts using reportlab's pagesizes | |
# The 'size' key now holds a tuple (width, height) | |
LAYOUTS = { | |
"A4 Portrait": {"size": A4, "icon": "π"}, | |
"A4 Landscape": {"size": landscape(A4), "icon": "π"}, | |
"Letter Portrait": {"size": letter, "icon": "π"}, | |
"Letter Landscape": {"size": landscape(letter), "icon": "π"}, | |
"Legal Portrait": {"size": legal, "icon": "π"}, | |
"Legal Landscape": {"size": landscape(legal), "icon": "π"}, | |
} | |
# Directory to save the generated PDFs | |
OUTPUT_DIR = Path("generated_pdfs") | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
# --- ReportLab PDF Generation --- | |
def markdown_to_story(markdown_text: str): | |
"""Converts a markdown string into a list of ReportLab Flowables (a 'story').""" | |
styles = getSampleStyleSheet() | |
# Define custom styles | |
style_normal = styles['BodyText'] | |
style_h1 = styles['h1'] | |
style_h2 = styles['h2'] | |
style_h3 = styles['h3'] | |
style_code = styles['Code'] | |
# A simple regex-based parser for markdown | |
story = [] | |
lines = markdown_text.split('\n') | |
in_code_block = False | |
code_block_text = "" | |
for line in lines: | |
if line.strip().startswith("```"): | |
if in_code_block: | |
story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code)) | |
in_code_block = False | |
code_block_text = "" | |
else: | |
in_code_block = True | |
continue | |
if in_code_block: | |
# Escape HTML tags for code blocks | |
escaped_line = line.replace('&', '&').replace('<', '<').replace('>', '>') | |
code_block_text += escaped_line + '\n' | |
continue | |
if line.startswith("# "): | |
story.append(Paragraph(line[2:], style_h1)) | |
elif line.startswith("## "): | |
story.append(Paragraph(line[3:], style_h2)) | |
elif line.startswith("### "): | |
story.append(Paragraph(line[4:], style_h3)) | |
elif line.strip().startswith(("* ", "- ")): | |
# Handle bullet points | |
story.append(Paragraph(f"β’ {line.strip()[2:]}", style_normal, bulletText='β’')) | |
elif re.match(r'^\d+\.\s', line.strip()): | |
# Handle numbered lists | |
story.append(Paragraph(line.strip(), style_normal)) | |
elif line.strip() == "": | |
story.append(Spacer(1, 0.2 * inch)) | |
else: | |
# Handle bold and italics | |
line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line) | |
line = re.sub(r'_(.*?)_', r'<i>\1</i>', line) | |
story.append(Paragraph(line, style_normal)) | |
return story | |
def create_pdf_with_reportlab(md_path: Path, layout_name: str, layout_properties: dict): | |
"""Creates a PDF for a given markdown file and layout.""" | |
try: | |
md_content = md_path.read_text(encoding="utf-8") | |
date_str = datetime.datetime.now().strftime("%Y-%m-%d") | |
output_filename = f"{md_path.stem}_{layout_name.replace(' ', '-')}_{date_str}.pdf" | |
output_path = OUTPUT_DIR / output_filename | |
doc = SimpleDocTemplate( | |
str(output_path), | |
pagesize=layout_properties.get("size", A4), | |
rightMargin=inch, | |
leftMargin=inch, | |
topMargin=inch, | |
bottomMargin=inch | |
) | |
story = markdown_to_story(md_content) | |
doc.build(story) | |
except Exception as e: | |
st.error(f"Failed to process {md_path.name} with ReportLab: {e}") | |
# --- Streamlit UI and File Handling (Mostly Unchanged) --- | |
def get_file_download_link(file_path: Path) -> str: | |
"""Generates a base64-encoded download link for a file.""" | |
with open(file_path, "rb") as f: | |
data = base64.b64encode(f.read()).decode() | |
return f'<a href="data:application/octet-stream;base64,{data}" download="{file_path.name}">Download</a>' | |
def display_file_explorer(): | |
"""Renders a simple file explorer in the Streamlit app.""" | |
st.header("π File Explorer") | |
st.subheader("Source Markdown Files (.md)") | |
md_files = list(Path(".").glob("*.md")) | |
if not md_files: | |
st.info("No Markdown files found. Create a `.md` file to begin.") | |
else: | |
for md_file in md_files: | |
col1, col2 = st.columns([0.8, 0.2]) | |
with col1: | |
st.write(f"π `{md_file.name}`") | |
with col2: | |
st.markdown(get_file_download_link(md_file), unsafe_allow_html=True) | |
st.subheader("Generated PDF Files") | |
pdf_files = sorted(list(OUTPUT_DIR.glob("*.pdf")), key=lambda p: p.stat().st_mtime, reverse=True) | |
if not pdf_files: | |
st.info("No PDFs generated yet. Click the button above.") | |
else: | |
for pdf_file in pdf_files: | |
col1, col2 = st.columns([0.8, 0.2]) | |
with col1: | |
st.write(f"π `{pdf_file.name}`") | |
with col2: | |
st.markdown(get_file_download_link(pdf_file), unsafe_allow_html=True) | |
# --- Main App --- | |
st.set_page_config(layout="wide", page_title="PDF Generator") | |
st.title("π Markdown to PDF Generator (ReportLab Engine)") | |
st.markdown("This tool finds all `.md` files in this directory, converts them to PDF in various layouts, and provides download links. It uses the `ReportLab` library and requires no external dependencies.") | |
if not list(Path(".").glob("*.md")): | |
with open("sample.md", "w", encoding="utf-8") as f: | |
f.write("# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF.\n\n### Features\n- Item 1\n- Item 2\n\n1. Numbered item\n2. Another one\n\n```\ndef hello():\n print(\"Hello, PDF!\")\n```\n") | |
st.rerun() | |
if st.button("π Generate PDFs from all Markdown Files", type="primary"): | |
markdown_files = list(Path(".").glob("*.md")) | |
if not markdown_files: | |
st.warning("No `.md` files found. Please add a markdown file to the directory.") | |
else: | |
total_pdfs = len(markdown_files) * len(LAYOUTS) | |
progress_bar = st.progress(0) | |
pdf_count = 0 | |
with st.spinner("Generating PDFs using ReportLab..."): | |
for md_file in markdown_files: | |
st.info(f"Processing: **{md_file.name}**") | |
for name, properties in LAYOUTS.items(): | |
st.write(f" - Generating `{name}` format...") | |
create_pdf_with_reportlab(md_file, name, properties) | |
pdf_count += 1 | |
progress_bar.progress(pdf_count / total_pdfs) | |
st.success("β PDF generation complete!") | |
st.rerun() | |
display_file_explorer() | |