awacke1 commited on
Commit
f258b94
Β·
verified Β·
1 Parent(s): 552122a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +273 -0
app.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pathlib import Path
3
+ import base64
4
+ import datetime
5
+ import re
6
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
7
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
8
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
9
+ from reportlab.lib.units import inch
10
+ from reportlab.pdfbase import pdfmetrics
11
+ from reportlab.pdfbase.ttfonts import TTFont
12
+ from reportlab.lib import colors
13
+
14
+ # --- Configuration & Setup ---
15
+
16
+ # Define layouts using reportlab's pagesizes
17
+ LAYOUTS = {
18
+ "A4 Portrait": {"size": A4, "icon": "πŸ“„"},
19
+ "A4 Landscape": {"size": landscape(A4), "icon": "πŸ“„"},
20
+ "Letter Portrait": {"size": letter, "icon": "πŸ“„"},
21
+ "Letter Landscape": {"size": landscape(letter), "icon": "πŸ“„"},
22
+ "Legal Portrait": {"size": legal, "icon": "πŸ“„"},
23
+ "Legal Landscape": {"size": landscape(legal), "icon": "πŸ“„"},
24
+ }
25
+
26
+ # Directory to save the generated PDFs
27
+ OUTPUT_DIR = Path("generated_pdfs")
28
+ OUTPUT_DIR.mkdir(exist_ok=True)
29
+
30
+ # Path for the required emoji font file
31
+ EMOJI_FONT_PATH = Path("NotoColorEmoji-Regular.ttf")
32
+
33
+ # Regex to find and wrap emojis for ReportLab
34
+ EMOJI_PATTERN = re.compile(
35
+ "["
36
+ "\U0001F600-\U0001F64F" # emoticons
37
+ "\U0001F300-\U0001F5FF" # symbols & pictographs
38
+ "\U0001F680-\U0001F6FF" # transport & map symbols
39
+ "\U0001F700-\U0001F77F" # alchemical symbols
40
+ "\U0001F780-\U0001F7FF" # Geometric Shapes Extended
41
+ "\U0001F800-\U0001F8FF" # Supplemental Arrows-C
42
+ "\U0001F900-\U0001F9FF" # Supplemental Symbols and Pictographs
43
+ "\U0001FA00-\U0001FA6F" # Chess Symbols
44
+ "\U0001FA70-\U0001FAFF" # Symbols and Pictographs Extended-A
45
+ "\U00002702-\U000027B0" # Dingbats
46
+ "\U000024C2-\U0001F251"
47
+ "]+",
48
+ flags=re.UNICODE,
49
+ )
50
+
51
+ # --- Core PDF Generation Class ---
52
+
53
+ class PDFGenerator:
54
+ """
55
+ An object-oriented approach to generating PDFs.
56
+ Handles font registration, markdown parsing, and PDF creation.
57
+ """
58
+ def __init__(self, font_path: Path):
59
+ """
60
+ ✨ To start the PDF show, a font we must know.
61
+ Initializes the generator and registers the necessary emoji font.
62
+ """
63
+ self.emoji_font_name = "NotoEmoji"
64
+ self._register_emoji_font(font_path)
65
+
66
+ def _register_emoji_font(self, font_path: Path):
67
+ """
68
+ ✍️ Before new fonts can grace the page, first they must be set on stage.
69
+ Registers the TTF font file with ReportLab if the file exists.
70
+ """
71
+ if font_path.exists():
72
+ pdfmetrics.registerFont(TTFont(self.emoji_font_name, font_path))
73
+ else:
74
+ # Provide a helpful error in the web app if the font is missing
75
+ st.error(f"Emoji font not found at '{font_path}'. Emojis will not be rendered. Please download it.")
76
+ self.emoji_font_name = "Helvetica" # Fallback to a standard font
77
+
78
+ def _wrap_emojis_for_reportlab(self, text: str) -> str:
79
+ """
80
+ πŸ˜€ To make emojis appear so grand, wrap them with a font command.
81
+ Finds all emojis and wraps them in ReportLab <font> tags.
82
+ """
83
+ # If the emoji font failed to register, don't try to use it.
84
+ if self.emoji_font_name != "NotoEmoji":
85
+ return text
86
+
87
+ # The lambda function takes each matched emoji (m) and wraps it.
88
+ return EMOJI_PATTERN.sub(lambda m: f'<font name="{self.emoji_font_name}">{m.group(0)}</font>', text)
89
+
90
+ def _markdown_to_story(self, markdown_text: str) -> list:
91
+ """
92
+ πŸ“œ From markdown text, a simple scrawl, this story builder answers the call.
93
+ Converts a markdown string πŸ“ into a list of ReportLab Flowables (a 'story').
94
+ """
95
+ styles = getSampleStyleSheet()
96
+
97
+ # Define custom styles for different markdown elements
98
+ style_normal = styles['BodyText']
99
+ style_h1 = styles['h1']
100
+ style_h2 = styles['h2']
101
+ style_h3 = styles['h3']
102
+ # Use a monospaced font for code blocks
103
+ style_code = ParagraphStyle('Code', parent=styles['Normal'], fontName='Courier', textColor=colors.darkred)
104
+
105
+ story = []
106
+ lines = markdown_text.split('\n')
107
+ in_code_block = False
108
+ code_block_text = ""
109
+
110
+ for line in lines:
111
+ # Handle code blocks (```)
112
+ if line.strip().startswith("```"):
113
+ if in_code_block:
114
+ story.append(Paragraph(code_block_text, style_code))
115
+ in_code_block = False
116
+ code_block_text = ""
117
+ else:
118
+ in_code_block = True
119
+ continue
120
+
121
+ if in_code_block:
122
+ # Escape HTML-sensitive characters and preserve line breaks within code
123
+ escaped_line = line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
124
+ code_block_text += escaped_line + '<br/>'
125
+ continue
126
+
127
+ # This is where we process each line for emojis BEFORE creating a Paragraph
128
+ processed_line = self._wrap_emojis_for_reportlab(line)
129
+
130
+ # Handle markdown syntax
131
+ if processed_line.startswith("# "):
132
+ story.append(Paragraph(self._wrap_emojis_for_reportlab(processed_line[2:]), style_h1))
133
+ elif processed_line.startswith("## "):
134
+ story.append(Paragraph(self._wrap_emojis_for_reportlab(processed_line[3:]), style_h2))
135
+ elif processed_line.startswith("### "):
136
+ story.append(Paragraph(self._wrap_emojis_for_reportlab(processed_line[4:]), style_h3))
137
+ elif processed_line.strip().startswith(("* ", "- ")):
138
+ story.append(Paragraph(f"β€’ {self._wrap_emojis_for_reportlab(processed_line.strip()[2:])}", style_normal))
139
+ elif re.match(r'^\d+\.\s', processed_line.strip()):
140
+ story.append(Paragraph(processed_line.strip(), style_normal))
141
+ elif processed_line.strip() == "":
142
+ story.append(Spacer(1, 0.2 * inch))
143
+ else:
144
+ # Handle bold (**) and italics (_) using ReportLab's rich text tags
145
+ formatted_line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', processed_line)
146
+ formatted_line = re.sub(r'_(.*?)_', r'<i>\1</i>', formatted_line)
147
+ story.append(Paragraph(formatted_line, style_normal))
148
+
149
+ return story
150
+
151
+ def create_pdf(self, md_asset: Path, layout_name: str, layout_properties: dict):
152
+ """
153
+ πŸ“„ With content and a layout's grace, this function builds the PDF space.
154
+ Creates a single PDF file πŸ“„ from a given markdown file πŸ“.
155
+ """
156
+ try:
157
+ md_content = md_asset.read_text(encoding="utf-8")
158
+
159
+ date_str = datetime.datetime.now().strftime("%Y-%m-%d")
160
+ output_filename = f"{md_asset.stem}_{layout_name.replace(' ', '-')}_{date_str}.pdf"
161
+ output_path = OUTPUT_DIR / output_filename
162
+
163
+ # The SimpleDocTemplate handles the page creation and content flow
164
+ doc = SimpleDocTemplate(
165
+ str(output_path),
166
+ pagesize=layout_properties.get("size", A4),
167
+ rightMargin=inch,
168
+ leftMargin=inch,
169
+ topMargin=inch,
170
+ bottomMargin=inch
171
+ )
172
+
173
+ story = self._markdown_to_story(md_content)
174
+
175
+ # The .build() method takes the story and renders the PDF
176
+ doc.build(story)
177
+
178
+ except Exception as e:
179
+ st.error(f"Failed to process {md_asset.name} with ReportLab: {e}")
180
+
181
+
182
+ # --- Streamlit UI and File Handling ---
183
+
184
+ def get_file_download_link(file_path: Path) -> str:
185
+ """
186
+ πŸ”— To grab your file and not delay, a special link is paved today.
187
+ Generates a base64-encoded download link for a file.
188
+ """
189
+ with open(file_path, "rb") as f:
190
+ data = base64.b64encode(f.read()).decode()
191
+ return f'<a href="data:application/octet-stream;base64,{data}" download="{file_path.name}">Download</a>'
192
+
193
+ def display_file_explorer():
194
+ """
195
+ πŸ“‚ To see your files, both old and new, this handy explorer gives a view.
196
+ Renders a simple file explorer in the Streamlit app for MD and PDF files.
197
+ """
198
+ st.header("πŸ“‚ File Explorer")
199
+
200
+ st.subheader("Source Markdown Files (.md)")
201
+ md_files = list(Path(".").glob("*.md"))
202
+ if not md_files:
203
+ st.info("No Markdown files found. A `sample.md` has been created for you.")
204
+ else:
205
+ for md_file in md_files:
206
+ col1, col2 = st.columns([0.8, 0.2])
207
+ with col1:
208
+ st.write(f"πŸ“ `{md_file.name}`")
209
+ with col2:
210
+ st.markdown(get_file_download_link(md_file), unsafe_allow_html=True)
211
+
212
+ st.subheader("Generated PDF Files")
213
+ # Sort PDFs by modification time to show the newest first
214
+ pdf_files = sorted(list(OUTPUT_DIR.glob("*.pdf")), key=lambda p: p.stat().st_mtime, reverse=True)
215
+ if not pdf_files:
216
+ st.info("No PDFs generated yet. Click the button above to start.")
217
+ else:
218
+ for pdf_file in pdf_files:
219
+ col1, col2 = st.columns([0.8, 0.2])
220
+ with col1:
221
+ st.write(f"πŸ“„ `{pdf_file.name}`")
222
+ with col2:
223
+ st.markdown(get_file_download_link(pdf_file), unsafe_allow_html=True)
224
+
225
+
226
+ # --- Main App Execution ---
227
+
228
+ def main():
229
+ """
230
+ πŸš€ To run the app and make it go, call this main function, you know!
231
+ """
232
+ st.set_page_config(layout="wide", page_title="PDF Generator")
233
+ st.title("πŸ“„ Markdown to PDF Generator")
234
+ st.markdown("This tool converts all `.md` files in this directory to PDF. It now supports emojis! πŸ‘")
235
+
236
+ # Create a sample markdown file if none exist, to help new users.
237
+ if not list(Path(".").glob("*.md")):
238
+ with open("sample.md", "w", encoding="utf-8") as f:
239
+ f.write("# Sample Document πŸ‘\n\nThis is a sample markdown file. **ReportLab** is creating the PDF. Emojis like πŸš€ and πŸ’‘ should now appear correctly.\n\n### Features\n- Item 1\n- Item 2\n\n```\ndef hello_world():\n print(\"Hello, PDF! πŸ‘‹\")\n```\n")
240
+ st.rerun()
241
+
242
+ # Instantiate our generator. It will handle font setup on its own.
243
+ pdf_generator = PDFGenerator(EMOJI_FONT_PATH)
244
+
245
+ if st.button("πŸš€ Generate PDFs from all Markdown Files", type="primary"):
246
+ markdown_files = list(Path(".").glob("*.md"))
247
+
248
+ if not markdown_files:
249
+ st.warning("No `.md` files found. Please add a markdown file to the directory.")
250
+ else:
251
+ total_pdfs = len(markdown_files) * len(LAYOUTS)
252
+ progress_bar = st.progress(0, text="Starting PDF generation...")
253
+ pdf_count = 0
254
+
255
+ with st.spinner("Generating PDFs... Please wait."):
256
+ for md_file in markdown_files:
257
+ st.info(f"Processing: **{md_file.name}**")
258
+ for name, properties in LAYOUTS.items():
259
+ # Use the instance method to create the PDF
260
+ pdf_generator.create_pdf(md_file, name, properties)
261
+ pdf_count += 1
262
+ progress_bar.progress(pdf_count / total_pdfs, f"Generated {pdf_count}/{total_pdfs} PDFs...")
263
+
264
+ st.success("βœ… PDF generation complete!")
265
+ st.balloons()
266
+ # Rerun to refresh the file explorer immediately
267
+ st.rerun()
268
+
269
+ display_file_explorer()
270
+
271
+
272
+ if __name__ == "__main__":
273
+ main()