awacke1 commited on
Commit
397c8b0
ยท
verified ยท
1 Parent(s): e764298

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -685
app.py CHANGED
@@ -1,711 +1,184 @@
1
- import io
2
- import os
3
- import re
4
- import glob
5
- import textwrap
6
  import streamlit as st
7
- import pandas as pd
8
- import mistune
9
- import fitz
10
- import edge_tts
11
- import asyncio
12
- import base64
13
-
14
- from datetime import datetime
15
  from pathlib import Path
16
- from PIL import Image
17
- from reportlab.pdfgen import canvas
18
- from reportlab.lib.pagesizes import letter, A4, legal, A3, A5, LETTER, LEGAL
19
- from reportlab.lib.utils import ImageReader
20
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage
21
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 
 
22
  from reportlab.lib import colors
23
- from reportlab.pdfbase import pdfmetrics
24
- from reportlab.pdfbase.ttfonts import TTFont
25
- from urllib.parse import quote
26
 
27
- # Page config
28
- st.set_page_config(page_title="PDF & Code Interpreter", layout="wide", page_icon="๐Ÿš€")
29
 
30
- def delete_asset(path):
31
- try:
32
- os.remove(path)
33
- except Exception as e:
34
- st.error(f"Error deleting file: {e}")
35
- st.rerun()
 
 
 
 
36
 
37
- async def generate_audio(text, voice, filename):
38
- communicate = edge_tts.Communicate(text, voice)
39
- await communicate.save(filename)
40
- return filename
41
 
42
- def clean_for_speech(text):
43
- text = text.replace("#", "")
44
- emoji_pattern = re.compile(
45
- r"[\U0001F300-\U0001F5FF"
46
- r"\U0001F600-\U0001F64F"
47
- r"\U0001F680-\U0001F6FF"
48
- r"\U0001F700-\U0001F77F"
49
- r"\U0001F780-\U0001F7FF"
50
- r"\U0001F800-\U0001F8FF"
51
- r"\U0001F900-\U0001F9FF"
52
- r"\U0001FA00-\U0001FA6F"
53
- r"\U0001FA70-\U0001FAFF"
54
- r"\u2600-\u26FF"
55
- r"\u2700-\u27BF]+", flags=re.UNICODE)
56
- return emoji_pattern.sub('', text)
57
 
58
- def detect_and_convert_links(text):
59
- md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
60
- text = md_link_pattern.sub(r'<a href="\2" color="blue">\1</a>', text)
61
- url_pattern = re.compile(r'(?<!href=")(https?://[^\s<>{}]+)', re.IGNORECASE)
62
- text = url_pattern.sub(r'<a href="\1" color="blue">\1</a>', text)
63
- return text
64
-
65
- def apply_emoji_font(text, emoji_font):
66
- tag_pattern = re.compile(r'(<[^>]+>)')
67
- segments = tag_pattern.split(text)
68
- result = []
69
- emoji_pattern = re.compile(
70
- r"([\U0001F300-\U0001F5FF"
71
- r"\U0001F600-\U0001F64F"
72
- r"\U0001F680-\U0001F6FF"
73
- r"\U0001F700-\U0001F77F"
74
- r"\U0001F780-\U0001F7FF"
75
- r"\U0001F800-\U0001F8FF"
76
- r"\U0001F900-\U0001F9FF"
77
- r"\U0001FAD0-\U0001FAD9"
78
- r"\U0001FA00-\U0001FA6F"
79
- r"\U0001FA70-\U0001FAFF"
80
- r"\u2600-\u26FF"
81
- r"\u2700-\u27BF]+)"
82
- )
83
- def replace_emoji(match):
84
- emoji = match.group(1)
85
- return f'<font face="{emoji_font}">{emoji}</font>'
86
- for segment in segments:
87
- if tag_pattern.match(segment):
88
- result.append(segment)
89
- else:
90
- parts = []
91
- last_pos = 0
92
- for match in emoji_pattern.finditer(segment):
93
- start, end = match.span()
94
- if last_pos < start:
95
- parts.append(f'<font face="DejaVuSans">{segment[last_pos:start]}</font>')
96
- parts.append(replace_emoji(match))
97
- last_pos = end
98
- if last_pos < len(segment):
99
- parts.append(f'<font face="DejaVuSans">{segment[last_pos:]}</font>')
100
- result.append(''.join(parts))
101
- return ''.join(result)
102
 
103
- def markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts):
104
- lines = markdown_text.strip().split('\n')
105
- pdf_content = []
106
- number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
107
- heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$')
108
- first_numbered_seen = False
109
  for line in lines:
110
- line = line.strip()
111
- if not line:
 
 
 
 
 
112
  continue
113
- if headings_to_fonts and line.startswith('#'):
114
- heading_match = heading_pattern.match(line)
115
- if heading_match:
116
- level = len(heading_match.group(1))
117
- heading_text = heading_match.group(2).strip()
118
- formatted_heading = f"<h{level}>{heading_text}</h{level}>"
119
- pdf_content.append(formatted_heading)
120
- continue
121
- is_numbered_line = number_pattern.match(line) is not None
122
- if add_space_before_numbered and is_numbered_line:
123
- if first_numbered_seen and not line.startswith("1."):
124
- pdf_content.append("")
125
- if not first_numbered_seen:
126
- first_numbered_seen = True
127
- line = detect_and_convert_links(line)
128
- line = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', line)
129
- line = re.sub(r'\*([^*]+?)\*', r'<b>\1</b>', line)
130
- pdf_content.append(line)
131
- total_lines = len(pdf_content)
132
- return pdf_content, total_lines
133
 
134
- def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined Document", page_size=A4):
135
- if not markdown_texts and not image_files:
136
- return None
137
- buffer = io.BytesIO()
138
-
139
- # Use the selected page size
140
- if page_size == "A4":
141
- page_dimensions = A4
142
- elif page_size == "Letter":
143
- page_dimensions = letter
144
- elif page_size == "Legal":
145
- page_dimensions = legal
146
- elif page_size == "A3":
147
- page_dimensions = A3
148
- elif page_size == "A5":
149
- page_dimensions = A5
150
- else:
151
- page_dimensions = A4 # Default fallback
152
-
153
- page_width = page_dimensions[0] * 2
154
- page_height = page_dimensions[1]
155
-
156
- doc = SimpleDocTemplate(
157
- buffer,
158
- pagesize=(page_width, page_height),
159
- leftMargin=36,
160
- rightMargin=36,
161
- topMargin=36,
162
- bottomMargin=36,
163
- title=doc_title
164
- )
165
- styles = getSampleStyleSheet()
166
- spacer_height = 10
167
- try:
168
- pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
169
- pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", "NotoEmoji-Bold.ttf"))
170
- except Exception as e:
171
- st.error(f"Font registration error: {e}")
172
- return None
173
- story = []
174
- for markdown_text in markdown_texts:
175
- pdf_content, total_lines = markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts)
176
- total_chars = sum(len(line) for line in pdf_content)
177
- hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
178
- longest_line_words = max(len(line.split()) for line in pdf_content) if pdf_content else 0
179
- content_density = total_lines * hierarchy_weight + total_chars / 50
180
- usable_height = page_height - 72 - spacer_height
181
- usable_width = page_width - 72
182
- avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
183
- col_width = usable_width / num_columns
184
- min_font_size = 5
185
- max_font_size = 16
186
- lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
187
- target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
188
- estimated_font_size = int(target_height_per_line / 1.5)
189
- adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
190
- if avg_line_chars > col_width / adjusted_font_size * 10:
191
- adjusted_font_size = int(col_width / (avg_line_chars / 10))
192
- adjusted_font_size = max(min_font_size, adjusted_font_size)
193
- if longest_line_words > 17 or lines_per_col > 20:
194
- font_scale = min(17 / max(longest_line_words, 17), 60 / max(lines_per_col, 20))
195
- adjusted_font_size = max(min_font_size, int(base_font_size * font_scale))
196
- item_style = ParagraphStyle(
197
- 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
198
- fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
199
- linkUnderline=True
200
- )
201
- numbered_bold_style = ParagraphStyle(
202
- 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
203
- fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
204
- linkUnderline=True
205
- )
206
- section_style = ParagraphStyle(
207
- 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
208
- textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
209
- linkUnderline=True
210
- )
211
- columns = [[] for _ in range(num_columns)]
212
- lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
213
- current_line_count = 0
214
- current_column = 0
215
- number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
216
- for item in pdf_content:
217
- if current_line_count >= lines_per_column and current_column < num_columns - 1:
218
- current_column += 1
219
- current_line_count = 0
220
- columns[current_column].append(item)
221
- current_line_count += 1
222
- column_cells = [[] for _ in range(num_columns)]
223
- for col_idx, column in enumerate(columns):
224
- for item in column:
225
- if isinstance(item, str):
226
- heading_match = re.match(r'<h(\d)>(.*?)</h\1>', item) if headings_to_fonts else None
227
- if heading_match:
228
- level = int(heading_match.group(1))
229
- heading_text = heading_match.group(2)
230
- heading_style = ParagraphStyle(
231
- f'Heading{level}Style',
232
- parent=styles['Heading1'],
233
- fontName="DejaVuSans",
234
- textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue),
235
- fontSize=adjusted_font_size * (1.6 - (level-1)*0.15),
236
- leading=adjusted_font_size * (1.8 - (level-1)*0.15),
237
- spaceAfter=4 - (level-1),
238
- spaceBefore=6 - (level-1),
239
- linkUnderline=True
240
- )
241
- column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style))
242
- elif item.startswith("<b>") and item.endswith("</b>"):
243
- content = item[3:-4].strip()
244
- if number_pattern.match(content):
245
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
246
- else:
247
- column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
248
- else:
249
- column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "NotoEmoji-Bold"), item_style))
250
- else:
251
- column_cells[col_idx].append(Paragraph(apply_emoji_font(str(item), "NotoEmoji-Bold"), item_style))
252
- max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
253
- for cells in column_cells:
254
- cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
255
- table_data = list(zip(*column_cells)) if column_cells else [[]]
256
- table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
257
- table.setStyle(TableStyle([
258
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
259
- ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
260
- ('BACKGROUND', (0, 0), (-1, -1), colors.white),
261
- ('GRID', (0, 0), (-1, -1), 0, colors.white),
262
- ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
263
- ('LEFTPADDING', (0, 0), (-1, -1), 2),
264
- ('RIGHTPADDING', (0, 0), (-1, -1), 2),
265
- ('TOPPADDING', (0, 0), (-1, -1), 1),
266
- ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
267
- ]))
268
- story.append(Spacer(1, spacer_height))
269
- story.append(table)
270
- story.append(Spacer(1, spacer_height * 2))
271
- for img_path in image_files:
272
- try:
273
- img = Image.open(img_path)
274
- img_width, img_height = img.size
275
- page_width_img, page_height_img = page_dimensions
276
- scale = min((page_width_img - 40) / img_width, (page_height_img - 40) / img_height)
277
- new_width = img_width * scale
278
- new_height = img_height * scale
279
- story.append(ReportLabImage(img_path, width=new_width, height=new_height))
280
- story.append(Spacer(1, spacer_height))
281
- except Exception as e:
282
- st.warning(f"Could not process image {img_path}: {e}")
283
  continue
284
- doc.build(story)
285
- buffer.seek(0)
286
- return buffer.getvalue()
287
 
288
- def pdf_to_image(pdf_bytes):
289
- if pdf_bytes is None:
290
- return None
291
- try:
292
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
293
- images = []
294
- for page in doc:
295
- pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
296
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
297
- images.append(img)
298
- doc.close()
299
- return images
300
- except Exception as e:
301
- st.error(f"Failed to render PDF preview: {e}")
302
- return None
 
 
 
 
 
 
303
 
304
- def get_video_html(video_path, width="100%"):
 
305
  try:
306
- video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
307
- return f'''
308
- <video width="{width}" controls autoplay muted loop>
309
- <source src="{video_url}" type="video/mp4">
310
- Your browser does not support the video tag.
311
- </video>
312
- '''
313
- except Exception as e:
314
- st.warning(f"Could not load video {video_path}: {e}")
315
- return ""
316
-
317
- def display_glossary_entity(k):
318
- search_urls = {
319
- "๐Ÿš€๐ŸŒŒArXiv": lambda k: f"https://arxiv.org/search/?query={quote(k)}&searchtype=all",
320
- "๐Ÿ“–": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
321
- "๐Ÿ”": lambda k: f"https://www.google.com/search?q={quote(k)}",
322
- "๐ŸŽฅ": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
323
- }
324
- links_md = ' '.join([f"[{emoji}]({url(k)})" for emoji, url in search_urls.items()])
325
- st.markdown(f"**{k}** <small>{links_md}</small>", unsafe_allow_html=True)
326
-
327
- # Tabs setup
328
- tab1, tab2 = st.tabs(["๐Ÿ“„ PDF Composer", "๐Ÿงช Code Interpreter"])
329
-
330
- with tab1:
331
- st.header("๐Ÿ“„ PDF Composer & Voice Generator ๐Ÿš€")
332
-
333
- # Sidebar PDF settings
334
- with st.sidebar:
335
- st.subheader("๐Ÿ“„ PDF Settings")
336
- columns = st.slider("Text columns", 1, 3, 2)
337
- font_family = st.selectbox("Font", ["Helvetica", "Times-Roman", "Courier", "DejaVuSans"])
338
- font_size = st.slider("Font size", 6, 24, 14)
339
 
340
- # Page size selection
341
- page_size_options = {
342
- "A4 (210 ร— 297 mm)": "A4",
343
- "Letter (8.5 ร— 11 in)": "Letter",
344
- "Legal (8.5 ร— 14 in)": "Legal",
345
- "A3 (297 ร— 420 mm)": "A3",
346
- "A5 (148 ร— 210 mm)": "A5"
347
- }
348
- selected_page_size = st.selectbox(
349
- "๐Ÿ“ Page Size",
350
- options=list(page_size_options.keys()),
351
- index=0 # Default to A4
352
  )
353
- page_size = page_size_options[selected_page_size]
354
-
355
- # Multiple markdown file upload
356
- md_files = st.file_uploader("Upload Markdown Files (.md)", type=["md"], accept_multiple_files=True)
357
- markdown_texts = []
358
- combined_text = ""
359
-
360
- if md_files:
361
- st.subheader(f"๐Ÿ“‚ Uploaded Files ({len(md_files)})")
362
- for i, md_file in enumerate(md_files):
363
- md_text = md_file.getvalue().decode("utf-8")
364
- markdown_texts.append(md_text)
365
- combined_text += md_text + "\n\n"
366
-
367
- with st.expander(f"๐Ÿ“„ {md_file.name}"):
368
- st.markdown(md_text[:500] + "..." if len(md_text) > 500 else md_text)
369
 
370
- stem = f"combined_{len(md_files)}_files_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
371
- else:
372
- # Single text area for manual input
373
- manual_text = st.text_area("Or enter markdown text directly", height=200)
374
- if manual_text:
375
- markdown_texts = [manual_text]
376
- combined_text = manual_text
377
- stem = datetime.now().strftime('%Y%m%d_%H%M%S')
378
-
379
- # Convert Markdown to plain text for voice generation
380
- if combined_text:
381
- renderer = mistune.HTMLRenderer()
382
- markdown = mistune.create_markdown(renderer=renderer)
383
- html = markdown(combined_text)
384
- plain_text = re.sub(r'<[^>]+>', '', html)
385
 
386
- st.subheader("๐Ÿ“Š Content Summary")
387
- col1, col2, col3, col4 = st.columns(4)
388
- with col1:
389
- st.metric("Files", len(md_files) if md_files else 1)
390
- with col2:
391
- st.metric("Total Characters", len(combined_text))
392
- with col3:
393
- st.metric("Estimated Words", len(combined_text.split()))
394
- with col4:
395
- st.metric("Page Size", selected_page_size.split(" (")[0])
396
- else:
397
- plain_text = ""
398
-
399
- # Voice settings
400
- st.subheader("๐Ÿ”Š Text-to-Speech Settings")
401
- col1, col2 = st.columns(2)
402
- with col1:
403
- languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"}
404
- voice_choice = st.selectbox("Voice Language", list(languages.keys()))
405
- voice_lang = languages[voice_choice]
406
- slow = st.checkbox("Slow Speech")
407
-
408
- with col2:
409
- VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
410
- selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
411
-
412
- if st.button("๐Ÿ”Š Generate & Download Voice MP3 from Text"):
413
- if plain_text.strip():
414
- voice_file = f"{stem}_{selected_voice}.mp3"
415
- try:
416
- with st.spinner("Generating audio..."):
417
- cleaned_text = clean_for_speech(plain_text)
418
- audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, voice_file))
419
- st.success("Audio generated successfully!")
420
- st.audio(audio_file)
421
- with open(audio_file, 'rb') as mp3:
422
- st.download_button("๐Ÿ“ฅ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg")
423
- except Exception as e:
424
- st.error(f"Error generating voice: {e}")
425
- else:
426
- st.warning("No text to generate voice from.")
427
-
428
- # Image uploads and ordering
429
- st.subheader("๐Ÿ–ผ๏ธ Image Management")
430
- imgs = st.file_uploader("Upload Images for PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
431
- ordered_images = []
432
- if imgs:
433
- st.write(f"๐Ÿ“Š Uploaded {len(imgs)} images")
434
- df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)])
435
- edited = st.data_editor(df_imgs, use_container_width=True, num_rows="dynamic")
436
- for _, row in edited.sort_values("order").iterrows():
437
- for f in imgs:
438
- if f.name == row['name']:
439
- ordered_images.append(f)
440
- break
441
-
442
- # PDF Generation
443
- st.subheader("๐Ÿ“„ PDF Generation")
444
-
445
- if st.button("๐Ÿ–‹๏ธ Generate PDF with Markdown & Images", type="primary"):
446
- if not markdown_texts and not ordered_images:
447
- st.warning("Please provide some markdown text or upload images to generate a PDF.")
448
- else:
449
- with st.spinner(f"Generating PDF with {page_size} page size..."):
450
- if markdown_texts and not ordered_images:
451
- # Use the enhanced create_pdf function
452
- pdf_bytes = create_pdf(
453
- markdown_texts=markdown_texts,
454
- image_files=[],
455
- base_font_size=font_size,
456
- num_columns=columns,
457
- add_space_before_numbered=True,
458
- headings_to_fonts=True,
459
- doc_title=f"Markdown_Document_{len(markdown_texts)}_files",
460
- page_size=page_size
461
- )
462
-
463
- if pdf_bytes:
464
- pdf_images = pdf_to_image(pdf_bytes)
465
- if pdf_images:
466
- st.subheader("Preview of Generated PDF")
467
- for i, img in enumerate(pdf_images):
468
- st.image(img, caption=f"Page {i+1}", use_container_width=True)
469
-
470
- pdf_name = f"{stem}.pdf"
471
- st.download_button("โฌ‡๏ธ Download PDF", data=pdf_bytes, file_name=pdf_name, mime="application/pdf")
472
- else:
473
- st.error("Failed to generate PDF from markdown.")
474
- else:
475
- # Fallback to original simple PDF generation for mixed content
476
- buf = io.BytesIO()
477
-
478
- # Get page dimensions for the selected page size
479
- if page_size == "A4":
480
- page_dimensions = A4
481
- elif page_size == "Letter":
482
- page_dimensions = letter
483
- elif page_size == "Legal":
484
- page_dimensions = legal
485
- elif page_size == "A3":
486
- page_dimensions = A3
487
- elif page_size == "A5":
488
- page_dimensions = A5
489
- else:
490
- page_dimensions = A4
491
-
492
- c = canvas.Canvas(buf, pagesize=page_dimensions)
493
-
494
- if plain_text.strip():
495
- page_w, page_h = page_dimensions
496
- margin = 40
497
- gutter = 20
498
- col_w = (page_w - 2*margin - (columns-1)*gutter) / columns
499
- c.setFont(font_family, font_size)
500
- line_height = font_size * 1.2
501
- col = 0
502
- x = margin
503
- y = page_h - margin
504
- avg_char_width = font_size * 0.6
505
- wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100
506
- for paragraph in plain_text.split("\n"):
507
- if not paragraph.strip():
508
- y -= line_height
509
- if y < margin:
510
- col += 1
511
- if col >= columns:
512
- c.showPage()
513
- c.setFont(font_family, font_size)
514
- col = 0
515
- x = margin + col*(col_w+gutter)
516
- y = page_h - margin
517
- continue
518
- for line in textwrap.wrap(paragraph, wrap_width):
519
- if y < margin:
520
- col += 1
521
- if col >= columns:
522
- c.showPage()
523
- c.setFont(font_family, font_size)
524
- col = 0
525
- x = margin + col*(col_w+gutter)
526
- y = page_h - margin
527
- c.drawString(x, y, line)
528
- y -= line_height
529
- y -= line_height
530
- for img_f in ordered_images:
531
- try:
532
- img = Image.open(img_f)
533
- w, h = img.size
534
- c.showPage()
535
- c.setPageSize((w, h))
536
- c.drawImage(ImageReader(img), 0, 0, w, h, preserveAspectRatio=False)
537
- except Exception as e:
538
- st.warning(f"Could not process image {img_f.name}: {e}")
539
- continue
540
- c.save()
541
- buf.seek(0)
542
- pdf_name = f"{stem}.pdf"
543
- st.success(f"PDF generated successfully with {page_size} page size!")
544
- st.download_button("โฌ‡๏ธ Download PDF", data=buf, file_name=pdf_name, mime="application/pdf")
545
-
546
- st.markdown("---")
547
- st.subheader("๐Ÿ“‚ Available Assets")
548
- all_assets = glob.glob("*.*")
549
- excluded_extensions = ['.py', '.ttf', '.txt']
550
- excluded_files = ['README.md', 'index.html']
551
- assets = sorted([
552
- a for a in all_assets
553
- if not (a.lower().endswith(tuple(excluded_extensions)) or a in excluded_files)
554
- and a.lower().endswith(('.md', '.png', '.jpg', '.jpeg'))
555
- ])
556
- if 'selected_assets' not in st.session_state:
557
- st.session_state.selected_assets = []
558
- if not assets:
559
- st.info("No available assets found.")
560
  else:
561
- for a in assets:
562
- ext = a.split('.')[-1].lower()
563
- cols = st.columns([1, 3, 1, 1])
564
- with cols[0]:
565
- is_selected = st.checkbox("", key=f"select_{a}", value=a in st.session_state.selected_assets)
566
- if is_selected and a not in st.session_state.selected_assets:
567
- st.session_state.selected_assets.append(a)
568
- elif not is_selected and a in st.session_state.selected_assets:
569
- st.session_state.selected_assets.remove(a)
570
- cols[1].write(a)
571
- try:
572
- if ext == 'md':
573
- with open(a, 'r', encoding='utf-8') as f:
574
- cols[2].download_button("๐Ÿ“ฅ", data=f.read(), file_name=a, mime="text/markdown")
575
- elif ext in ['png', 'jpg', 'jpeg']:
576
- with open(a, 'rb') as img_file:
577
- cols[2].download_button("โฌ‡๏ธ", data=img_file, file_name=a, mime=f"image/{ext}")
578
- cols[3].button("๐Ÿ—‘๏ธ", key=f"del_{a}", on_click=delete_asset, args=(a,))
579
- except Exception as e:
580
- cols[3].error(f"Error handling file {a}: {e}")
581
- if st.button("๐Ÿ“‘ Generate PDF from Selected Assets"):
582
- if not st.session_state.selected_assets:
583
- st.warning("Please select at least one asset to generate a PDF.")
584
- else:
585
- selected_markdown_texts = []
586
- image_files = []
587
- for a in st.session_state.selected_assets:
588
- ext = a.split('.')[-1].lower()
589
- if ext == 'md':
590
- with open(a, 'r', encoding='utf-8') as f:
591
- selected_markdown_texts.append(f.read())
592
- elif ext in ['png', 'jpg', 'jpeg']:
593
- image_files.append(a)
594
- with st.spinner("Generating PDF from selected assets..."):
595
- pdf_bytes = create_pdf(
596
- markdown_texts=selected_markdown_texts,
597
- image_files=image_files,
598
- base_font_size=font_size,
599
- num_columns=columns,
600
- add_space_before_numbered=True,
601
- headings_to_fonts=True,
602
- doc_title="Combined_Selected_Assets",
603
- page_size=page_size
604
- )
605
- if pdf_bytes:
606
- pdf_images = pdf_to_image(pdf_bytes)
607
- if pdf_images:
608
- st.subheader("Preview of Generated PDF")
609
- for i, img in enumerate(pdf_images):
610
- st.image(img, caption=f"Page {i+1}", use_container_width=True)
611
- prefix = datetime.now().strftime("%Y%m%d_%H%M%S")
612
- st.download_button(
613
- label="๐Ÿ’พ Download Combined PDF",
614
- data=pdf_bytes,
615
- file_name=f"{prefix}_combined.pdf",
616
- mime="application/pdf"
617
- )
618
- else:
619
- st.error("Failed to generate PDF.")
620
-
621
- st.markdown("---")
622
- st.subheader("๐Ÿ–ผ Image Gallery")
623
- image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
624
- image_cols = st.slider("Gallery Columns ๐Ÿ–ผ", min_value=1, max_value=15, value=5, key="image_cols")
625
- if image_files:
626
- cols = st.columns(image_cols)
627
- for idx, image_file in enumerate(image_files):
628
- with cols[idx % image_cols]:
629
- try:
630
- img = Image.open(image_file)
631
- st.image(img, caption=image_file, use_container_width=True)
632
- display_glossary_entity(os.path.splitext(image_file)[0])
633
- except Exception as e:
634
- st.warning(f"Could not load image {image_file}: {e}")
635
  else:
636
- st.info("No images found in the current directory.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
 
638
- st.markdown("---")
639
- st.subheader("๐ŸŽฅ Video Gallery")
640
- video_files = glob.glob("*.mp4")
641
- video_cols = st.slider("Gallery Columns ๐ŸŽฌ", min_value=1, max_value=5, value=3, key="video_cols")
642
- if video_files:
643
- cols = st.columns(video_cols)
644
- for idx, video_file in enumerate(video_files):
645
- with cols[idx % video_cols]:
646
- st.markdown(get_video_html(video_file, width="100%"), unsafe_allow_html=True)
647
- display_glossary_entity(os.path.splitext(video_file)[0])
648
  else:
649
- st.info("No videos found in the current directory.")
 
 
650
 
651
- with tab2:
652
- st.header("๐Ÿงช Python Code Executor & Demo")
653
- import io, sys
654
- from contextlib import redirect_stdout
655
- DEFAULT_CODE = '''import streamlit as st
656
- import random
657
- st.title("๐Ÿ“Š Demo App")
658
- st.markdown("Random number and color demo")
659
- col1, col2 = st.columns(2)
660
- with col1:
661
- num = st.number_input("Number:", 1, 100, 10)
662
- mul = st.slider("Multiplier:", 1, 10, 2)
663
- if st.button("Calc"):
664
- st.write(num * mul)
665
- with col2:
666
- color = st.color_picker("Pick color","#ff0000")
667
- st.markdown(f'<div style="background:{color};padding:10px;">Color</div>', unsafe_allow_html=True)
668
- '''
669
- def extract_python_code(md: str) -> list:
670
- return re.findall(r"```python\s*(.*?)```", md, re.DOTALL)
671
- def execute_code(code: str) -> tuple:
672
- buf = io.StringIO(); local_vars = {}
673
- try:
674
- with redirect_stdout(buf):
675
- exec(code, {}, local_vars)
676
- return buf.getvalue(), None
677
- except Exception as e:
678
- return None, str(e)
679
- up = st.file_uploader("Upload .py or .md", type=['py', 'md'])
680
- if 'code' not in st.session_state:
681
- st.session_state.code = DEFAULT_CODE
682
- if up:
683
- text = up.getvalue().decode()
684
- if up.type == 'text/markdown':
685
- codes = extract_python_code(text)
686
- if codes:
687
- st.session_state.code = codes[0].strip()
688
- else:
689
- st.warning("No Python code block found in the markdown file.")
690
- st.session_state.code = ''
691
- else:
692
- st.session_state.code = text.strip()
693
- st.code(st.session_state.code, language='python')
694
- else:
695
- st.session_state.code = st.text_area("๐Ÿ’ป Code Editor", value=st.session_state.code, height=400)
696
- c1, c2 = st.columns([1, 1])
697
- if c1.button("โ–ถ๏ธ Run Code"):
698
- if st.session_state.code.strip():
699
- out, err = execute_code(st.session_state.code)
700
- if err:
701
- st.error(f"Execution Error:\n{err}")
702
- elif out:
703
- st.subheader("Output:")
704
- st.code(out)
705
- else:
706
- st.success("Executed with no standard output.")
707
- else:
708
- st.warning("No code to run.")
709
- if c2.button("๐Ÿ—‘๏ธ Clear Code"):
710
- st.session_state.code = ''
711
- st.rerun()
 
 
 
 
 
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
2
  from pathlib import Path
3
+ import base64
4
+ import datetime
5
+ import re
6
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
 
7
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
8
+ from reportlab.lib.pagesizes import letter, A4, legal, landscape
9
+ from reportlab.lib.units import inch
10
  from reportlab.lib import colors
 
 
 
11
 
12
+ # --- Configuration & Setup ---
 
13
 
14
+ # Define layouts using reportlab's pagesizes
15
+ # The 'size' key now holds a tuple (width, height)
16
+ LAYOUTS = {
17
+ "A4 Portrait": {"size": A4, "icon": "๐Ÿ“„"},
18
+ "A4 Landscape": {"size": landscape(A4), "icon": "๐Ÿ“„"},
19
+ "Letter Portrait": {"size": letter, "icon": "๐Ÿ“„"},
20
+ "Letter Landscape": {"size": landscape(letter), "icon": "๐Ÿ“„"},
21
+ "Legal Portrait": {"size": legal, "icon": "๐Ÿ“„"},
22
+ "Legal Landscape": {"size": landscape(legal), "icon": "๐Ÿ“„"},
23
+ }
24
 
25
+ # Directory to save the generated PDFs
26
+ OUTPUT_DIR = Path("generated_pdfs")
27
+ OUTPUT_DIR.mkdir(exist_ok=True)
 
28
 
29
+ # --- ReportLab PDF Generation ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def markdown_to_story(markdown_text: str):
32
+ """Converts a markdown string into a list of ReportLab Flowables (a 'story')."""
33
+ styles = getSampleStyleSheet()
34
+
35
+ # Define custom styles
36
+ style_normal = styles['BodyText']
37
+ style_h1 = styles['h1']
38
+ style_h2 = styles['h2']
39
+ style_h3 = styles['h3']
40
+ style_code = styles['Code']
41
+
42
+ # A simple regex-based parser for markdown
43
+ story = []
44
+ lines = markdown_text.split('\n')
45
+
46
+ in_code_block = False
47
+ code_block_text = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
 
 
 
 
 
 
49
  for line in lines:
50
+ if line.strip().startswith("```"):
51
+ if in_code_block:
52
+ story.append(Paragraph(code_block_text.replace('\n', '<br/>'), style_code))
53
+ in_code_block = False
54
+ code_block_text = ""
55
+ else:
56
+ in_code_block = True
57
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ if in_code_block:
60
+ # Escape HTML tags for code blocks
61
+ escaped_line = line.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
62
+ code_block_text += escaped_line + '\n'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  continue
 
 
 
64
 
65
+ if line.startswith("# "):
66
+ story.append(Paragraph(line[2:], style_h1))
67
+ elif line.startswith("## "):
68
+ story.append(Paragraph(line[3:], style_h2))
69
+ elif line.startswith("### "):
70
+ story.append(Paragraph(line[4:], style_h3))
71
+ elif line.strip().startswith(("* ", "- ")):
72
+ # Handle bullet points
73
+ story.append(Paragraph(f"โ€ข {line.strip()[2:]}", style_normal, bulletText='โ€ข'))
74
+ elif re.match(r'^\d+\.\s', line.strip()):
75
+ # Handle numbered lists
76
+ story.append(Paragraph(line.strip(), style_normal))
77
+ elif line.strip() == "":
78
+ story.append(Spacer(1, 0.2 * inch))
79
+ else:
80
+ # Handle bold and italics
81
+ line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
82
+ line = re.sub(r'_(.*?)_', r'<i>\1</i>', line)
83
+ story.append(Paragraph(line, style_normal))
84
+
85
+ return story
86
 
87
+ def create_pdf_with_reportlab(md_path: Path, layout_name: str, layout_properties: dict):
88
+ """Creates a PDF for a given markdown file and layout."""
89
  try:
90
+ md_content = md_path.read_text(encoding="utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ date_str = datetime.datetime.now().strftime("%Y-%m-%d")
93
+ output_filename = f"{md_path.stem}_{layout_name.replace(' ', '-')}_{date_str}.pdf"
94
+ output_path = OUTPUT_DIR / output_filename
95
+
96
+ doc = SimpleDocTemplate(
97
+ str(output_path),
98
+ pagesize=layout_properties.get("size", A4),
99
+ rightMargin=inch,
100
+ leftMargin=inch,
101
+ topMargin=inch,
102
+ bottomMargin=inch
 
103
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ story = markdown_to_story(md_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ doc.build(story)
108
+
109
+ except Exception as e:
110
+ st.error(f"Failed to process {md_path.name} with ReportLab: {e}")
111
+
112
+
113
+ # --- Streamlit UI and File Handling (Mostly Unchanged) ---
114
+
115
+ def get_file_download_link(file_path: Path) -> str:
116
+ """Generates a base64-encoded download link for a file."""
117
+ with open(file_path, "rb") as f:
118
+ data = base64.b64encode(f.read()).decode()
119
+ return f'<a href="data:application/octet-stream;base64,{data}" download="{file_path.name}">Download</a>'
120
+
121
+ def display_file_explorer():
122
+ """Renders a simple file explorer in the Streamlit app."""
123
+ st.header("๐Ÿ“‚ File Explorer")
124
+
125
+ st.subheader("Source Markdown Files (.md)")
126
+ md_files = list(Path(".").glob("*.md"))
127
+ if not md_files:
128
+ st.info("No Markdown files found. Create a `.md` file to begin.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  else:
130
+ for md_file in md_files:
131
+ col1, col2 = st.columns([0.8, 0.2])
132
+ with col1:
133
+ st.write(f"๐Ÿ“ `{md_file.name}`")
134
+ with col2:
135
+ st.markdown(get_file_download_link(md_file), unsafe_allow_html=True)
136
+
137
+ st.subheader("Generated PDF Files")
138
+ pdf_files = sorted(list(OUTPUT_DIR.glob("*.pdf")), key=lambda p: p.stat().st_mtime, reverse=True)
139
+ if not pdf_files:
140
+ st.info("No PDFs generated yet. Click the button above.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  else:
142
+ for pdf_file in pdf_files:
143
+ col1, col2 = st.columns([0.8, 0.2])
144
+ with col1:
145
+ st.write(f"๐Ÿ“„ `{pdf_file.name}`")
146
+ with col2:
147
+ st.markdown(get_file_download_link(pdf_file), unsafe_allow_html=True)
148
+
149
+
150
+ # --- Main App ---
151
+
152
+ st.set_page_config(layout="wide", page_title="PDF Generator")
153
+
154
+ st.title("๐Ÿ“„ Markdown to PDF Generator (ReportLab Engine)")
155
+ st.markdown("This tool finds all `.md` files in this directory, converts them to PDF in various layouts, and provides download links. It uses the `ReportLab` library and requires no external dependencies.")
156
+
157
+ if not list(Path(".").glob("*.md")):
158
+ with open("sample.md", "w", encoding="utf-8") as f:
159
+ f.write("# Sample Document\n\nThis is a sample markdown file. **ReportLab** is now creating the PDF.\n\n### Features\n- Item 1\n- Item 2\n\n1. Numbered item\n2. Another one\n\n```\ndef hello():\n print(\"Hello, PDF!\")\n```\n")
160
+ st.rerun()
161
+
162
+ if st.button("๐Ÿš€ Generate PDFs from all Markdown Files", type="primary"):
163
+ markdown_files = list(Path(".").glob("*.md"))
164
 
165
+ if not markdown_files:
166
+ st.warning("No `.md` files found. Please add a markdown file to the directory.")
 
 
 
 
 
 
 
 
167
  else:
168
+ total_pdfs = len(markdown_files) * len(LAYOUTS)
169
+ progress_bar = st.progress(0)
170
+ pdf_count = 0
171
 
172
+ with st.spinner("Generating PDFs using ReportLab..."):
173
+ for md_file in markdown_files:
174
+ st.info(f"Processing: **{md_file.name}**")
175
+ for name, properties in LAYOUTS.items():
176
+ st.write(f" - Generating `{name}` format...")
177
+ create_pdf_with_reportlab(md_file, name, properties)
178
+ pdf_count += 1
179
+ progress_bar.progress(pdf_count / total_pdfs)
180
+
181
+ st.success("โœ… PDF generation complete!")
182
+ st.rerun()
183
+
184
+ display_file_explorer()