import io import os import re import glob import textwrap from datetime import datetime from pathlib import Path import streamlit as st import pandas as pd from PIL import Image from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter, A4 from reportlab.lib.utils import ImageReader from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib import colors from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont import mistune import fitz import edge_tts import asyncio import base64 from urllib.parse import quote # Page config st.set_page_config(page_title="PDF & Code Interpreter", layout="wide", page_icon="πŸš€") def delete_asset(path): try: os.remove(path) except Exception as e: st.error(f"Error deleting file: {e}") st.rerun() async def generate_audio(text, voice, filename): communicate = edge_tts.Communicate(text, voice) await communicate.save(filename) return filename def clean_for_speech(text): text = text.replace("#", "") emoji_pattern = re.compile( r"[\U0001F300-\U0001F5FF" r"\U0001F600-\U0001F64F" r"\U0001F680-\U0001F6FF" r"\U0001F700-\U0001F77F" r"\U0001F780-\U0001F7FF" r"\U0001F800-\U0001F8FF" r"\U0001F900-\U0001F9FF" r"\U0001FA00-\U0001FA6F" r"\U0001FA70-\U0001FAFF" r"\u2600-\u26FF" r"\u2700-\u27BF]+", flags=re.UNICODE) return emoji_pattern.sub('', text) def detect_and_convert_links(text): md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)') text = md_link_pattern.sub(r'\1', text) url_pattern = re.compile(r'(?{}]+)', re.IGNORECASE) text = url_pattern.sub(r'\1', text) return text def apply_emoji_font(text, emoji_font): tag_pattern = re.compile(r'(<[^>]+>)') segments = tag_pattern.split(text) result = [] emoji_pattern = re.compile( r"([\U0001F300-\U0001F5FF" r"\U0001F600-\U0001F64F" r"\U0001F680-\U0001F6FF" r"\U0001F700-\U0001F77F" r"\U0001F780-\U0001F7FF" r"\U0001F800-\U0001F8FF" r"\U0001F900-\U0001F9FF" r"\U0001FAD0-\U0001FAD9" r"\U0001FA00-\U0001FA6F" r"\U0001FA70-\U0001FAFF" r"\u2600-\u26FF" r"\u2700-\u27BF]+)" ) def replace_emoji(match): emoji = match.group(1) return f'{emoji}' for segment in segments: if tag_pattern.match(segment): result.append(segment) else: parts = [] last_pos = 0 for match in emoji_pattern.finditer(segment): start, end = match.span() if last_pos < start: parts.append(f'{segment[last_pos:start]}') parts.append(replace_emoji(match)) last_pos = end if last_pos < len(segment): parts.append(f'{segment[last_pos:]}') result.append(''.join(parts)) return ''.join(result) def markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts): lines = markdown_text.strip().split('\n') pdf_content = [] number_pattern = re.compile(r'^\d+(\.\d+)*\.\s') heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$') first_numbered_seen = False for line in lines: line = line.strip() if not line: continue if headings_to_fonts and line.startswith('#'): heading_match = heading_pattern.match(line) if heading_match: level = len(heading_match.group(1)) heading_text = heading_match.group(2).strip() formatted_heading = f"{heading_text}" pdf_content.append(formatted_heading) continue is_numbered_line = number_pattern.match(line) is not None if add_space_before_numbered and is_numbered_line: if first_numbered_seen and not line.startswith("1."): pdf_content.append("") if not first_numbered_seen: first_numbered_seen = True line = detect_and_convert_links(line) line = re.sub(r'\*\*(.+?)\*\*', r'\1', line) line = re.sub(r'\*([^*]+?)\*', r'\1', line) pdf_content.append(line) total_lines = len(pdf_content) return pdf_content, total_lines def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined Document"): if not markdown_texts and not image_files: return None buffer = io.BytesIO() page_width = A4[0] * 2 page_height = A4[1] doc = SimpleDocTemplate( buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36, title=doc_title ) styles = getSampleStyleSheet() spacer_height = 10 try: pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf")) pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", "NotoEmoji-Bold.ttf")) except Exception as e: st.error(f"Font registration error: {e}") return None story = [] for markdown_text in markdown_texts: pdf_content, total_lines = markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts) total_chars = sum(len(line) for line in pdf_content) hierarchy_weight = sum(1.5 if line.startswith("") else 1 for line in pdf_content) longest_line_words = max(len(line.split()) for line in pdf_content) if pdf_content else 0 content_density = total_lines * hierarchy_weight + total_chars / 50 usable_height = page_height - 72 - spacer_height usable_width = page_width - 72 avg_line_chars = total_chars / total_lines if total_lines > 0 else 50 col_width = usable_width / num_columns min_font_size = 5 max_font_size = 16 lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height estimated_font_size = int(target_height_per_line / 1.5) adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size)) if avg_line_chars > col_width / adjusted_font_size * 10: adjusted_font_size = int(col_width / (avg_line_chars / 10)) adjusted_font_size = max(min_font_size, adjusted_font_size) if longest_line_words > 17 or lines_per_col > 20: font_scale = min(17 / max(longest_line_words, 17), 60 / max(lines_per_col, 20)) adjusted_font_size = max(min_font_size, int(base_font_size * font_scale)) item_style = ParagraphStyle( 'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans", fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1, linkUnderline=True ) numbered_bold_style = ParagraphStyle( 'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold", fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1, linkUnderline=True ) section_style = ParagraphStyle( 'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans", textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2, linkUnderline=True ) columns = [[] for _ in range(num_columns)] lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines current_line_count = 0 current_column = 0 number_pattern = re.compile(r'^\d+(\.\d+)*\.\s') for item in pdf_content: if current_line_count >= lines_per_column and current_column < num_columns - 1: current_column += 1 current_line_count = 0 columns[current_column].append(item) current_line_count += 1 column_cells = [[] for _ in range(num_columns)] for col_idx, column in enumerate(columns): for item in column: if isinstance(item, str): heading_match = re.match(r'(.*?)', item) if headings_to_fonts else None if heading_match: level = int(heading_match.group(1)) heading_text = heading_match.group(2) heading_style = ParagraphStyle( f'Heading{level}Style', parent=styles['Heading1'], fontName="DejaVuSans", textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue), fontSize=adjusted_font_size * (1.6 - (level-1)*0.15), leading=adjusted_font_size * (1.8 - (level-1)*0.15), spaceAfter=4 - (level-1), spaceBefore=6 - (level-1), linkUnderline=True ) column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style)) elif item.startswith("") and item.endswith(""): content = item[3:-4].strip() if number_pattern.match(content): column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style)) else: column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style)) else: column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "NotoEmoji-Bold"), item_style)) else: column_cells[col_idx].append(Paragraph(apply_emoji_font(str(item), "NotoEmoji-Bold"), item_style)) max_cells = max(len(cells) for cells in column_cells) if column_cells else 0 for cells in column_cells: cells.extend([Paragraph("", item_style)] * (max_cells - len(cells))) table_data = list(zip(*column_cells)) if column_cells else [[]] table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER') table.setStyle(TableStyle([ ('VALIGN', (0, 0), (-1, -1), 'TOP'), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('BACKGROUND', (0, 0), (-1, -1), colors.white), ('GRID', (0, 0), (-1, -1), 0, colors.white), ('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey), ('LEFTPADDING', (0, 0), (-1, -1), 2), ('RIGHTPADDING', (0, 0), (-1, -1), 2), ('TOPPADDING', (0, 0), (-1, -1), 1), ('BOTTOMPADDING', (0, 0), (-1, -1), 1), ])) story.append(Spacer(1, spacer_height)) story.append(table) story.append(Spacer(1, spacer_height * 2)) for img_path in image_files: try: img = Image.open(img_path) img_width, img_height = img.size page_width, page_height = A4 scale = min((page_width - 40) / img_width, (page_height - 40) / img_height) new_width = img_width * scale new_height = img_height * scale story.append(ReportLabImage(img_path, width=new_width, height=new_height)) story.append(Spacer(1, spacer_height)) except Exception as e: st.warning(f"Could not process image {img_path}: {e}") continue doc.build(story) buffer.seek(0) return buffer.getvalue() def pdf_to_image(pdf_bytes): if pdf_bytes is None: return None try: doc = fitz.open(stream=pdf_bytes, filetype="pdf") images = [] for page in doc: pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) images.append(img) doc.close() return images except Exception as e: st.error(f"Failed to render PDF preview: {e}") return None def get_video_html(video_path, width="100%"): try: video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}" return f''' ''' except Exception as e: st.warning(f"Could not load video {video_path}: {e}") return "" def display_glossary_entity(k): search_urls = { "πŸš€πŸŒŒArXiv": lambda k: f"https://arxiv.org/search/?query={quote(k)}&searchtype=all", "πŸ“–": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}", "πŸ”": lambda k: f"https://www.google.com/search?q={quote(k)}", "πŸŽ₯": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}", } links_md = ' '.join([f"[{emoji}]({url(k)})" for emoji, url in search_urls.items()]) st.markdown(f"**{k}** {links_md}", unsafe_allow_html=True) # Tabs setup tab1, tab2 = st.tabs(["πŸ“„ PDF Composer", "πŸ§ͺ Code Interpreter"]) with tab1: st.header("πŸ“„ PDF Composer & Voice Generator πŸš€") # Sidebar PDF text settings columns = st.sidebar.slider("Text columns", 1, 3, 2) font_family = st.sidebar.selectbox("Font", ["Helvetica", "Times-Roman", "Courier", "DejaVuSans"]) font_size = st.sidebar.slider("Font size", 6, 24, 14) # Markdown input md_file = st.file_uploader("Upload Markdown (.md)", type=["md"]) if md_file: md_text = md_file.getvalue().decode("utf-8") stem = Path(md_file.name).stem else: md_text = st.text_area("Or enter markdown text directly", height=200) stem = datetime.now().strftime('%Y%m%d_%H%M%S') # Convert Markdown to plain text renderer = mistune.HTMLRenderer() markdown = mistune.create_markdown(renderer=renderer) html = markdown(md_text or "") plain_text = re.sub(r'<[^>]+>', '', html) # Voice settings languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"} voice_choice = st.selectbox("Voice Language", list(languages.keys())) voice_lang = languages[voice_choice] slow = st.checkbox("Slow Speech") VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"] selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0) if st.button("πŸ”Š Generate & Download Voice MP3 from Text"): if plain_text.strip(): voice_file = f"{stem}_{selected_voice}.mp3" try: cleaned_text = clean_for_speech(plain_text) audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, voice_file)) st.audio(audio_file) with open(audio_file, 'rb') as mp3: st.download_button("πŸ“₯ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg") except Exception as e: st.error(f"Error generating voice: {e}") else: st.warning("No text to generate voice from.") # Image uploads and ordering imgs = st.file_uploader("Upload Images for PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True) ordered_images = [] if imgs: df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)]) edited = st.data_editor(df_imgs, use_container_width=True, num_rows="dynamic") for _, row in edited.sort_values("order").iterrows(): for f in imgs: if f.name == row['name']: ordered_images.append(f) break if st.button("πŸ–‹οΈ Generate PDF with Markdown & Images"): if not plain_text.strip() and not ordered_images: st.warning("Please provide some text or upload images to generate a PDF.") else: buf = io.BytesIO() c = canvas.Canvas(buf) if plain_text.strip(): page_w, page_h = letter margin = 40 gutter = 20 col_w = (page_w - 2*margin - (columns-1)*gutter) / columns c.setFont(font_family, font_size) line_height = font_size * 1.2 col = 0 x = margin y = page_h - margin avg_char_width = font_size * 0.6 wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100 for paragraph in plain_text.split("\n"): if not paragraph.strip(): y -= line_height if y < margin: col += 1 if col >= columns: c.showPage() c.setFont(font_family, font_size) col = 0 x = margin + col*(col_w+gutter) y = page_h - margin continue for line in textwrap.wrap(paragraph, wrap_width): if y < margin: col += 1 if col >= columns: c.showPage() c.setFont(font_family, font_size) col = 0 x = margin + col*(col_w+gutter) y = page_h - margin c.drawString(x, y, line) y -= line_height y -= line_height for img_f in ordered_images: try: img = Image.open(img_f) w, h = img.size c.showPage() c.setPageSize((w, h)) c.drawImage(ImageReader(img), 0, 0, w, h, preserveAspectRatio=False) except Exception as e: st.warning(f"Could not process image {img_f.name}: {e}") continue c.save() buf.seek(0) pdf_name = f"{stem}.pdf" st.download_button("⬇️ Download PDF", data=buf, file_name=pdf_name, mime="application/pdf") st.markdown("---") st.subheader("πŸ“‚ Available Assets") all_assets = glob.glob("*.*") excluded_extensions = ['.py', '.ttf', '.txt'] excluded_files = ['README.md', 'index.html'] assets = sorted([ a for a in all_assets if not (a.lower().endswith(tuple(excluded_extensions)) or a in excluded_files) and a.lower().endswith(('.md', '.png', '.jpg', '.jpeg')) ]) if 'selected_assets' not in st.session_state: st.session_state.selected_assets = [] if not assets: st.info("No available assets found.") else: for a in assets: ext = a.split('.')[-1].lower() cols = st.columns([1, 3, 1, 1]) with cols[0]: is_selected = st.checkbox("", key=f"select_{a}", value=a in st.session_state.selected_assets) if is_selected and a not in st.session_state.selected_assets: st.session_state.selected_assets.append(a) elif not is_selected and a in st.session_state.selected_assets: st.session_state.selected_assets.remove(a) cols[1].write(a) try: if ext == 'md': with open(a, 'r', encoding='utf-8') as f: cols[2].download_button("πŸ“₯", data=f.read(), file_name=a, mime="text/markdown") elif ext in ['png', 'jpg', 'jpeg']: with open(a, 'rb') as img_file: cols[2].download_button("⬇️", data=img_file, file_name=a, mime=f"image/{ext}") cols[3].button("πŸ—‘οΈ", key=f"del_{a}", on_click=delete_asset, args=(a,)) except Exception as e: cols[3].error(f"Error handling file {a}: {e}") if st.button("πŸ“‘ Generate PDF from Selected Assets"): if not st.session_state.selected_assets: st.warning("Please select at least one asset to generate a PDF.") else: markdown_texts = [] image_files = [] for a in st.session_state.selected_assets: ext = a.split('.')[-1].lower() if ext == 'md': with open(a, 'r', encoding='utf-8') as f: markdown_texts.append(f.read()) elif ext in ['png', 'jpg', 'jpeg']: image_files.append(a) with st.spinner("Generating PDF from selected assets..."): pdf_bytes = create_pdf( markdown_texts=markdown_texts, image_files=image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined_Selected_Assets" ) if pdf_bytes: pdf_images = pdf_to_image(pdf_bytes) if pdf_images: st.subheader("Preview of Generated PDF") for i, img in enumerate(pdf_images): st.image(img, caption=f"Page {i+1}", use_container_width=True) prefix = datetime.now().strftime("%Y%m%d_%H%M%S") st.download_button( label="πŸ’Ύ Download Combined PDF", data=pdf_bytes, file_name=f"{prefix}_combined.pdf", mime="application/pdf" ) else: st.error("Failed to generate PDF.") st.markdown("---") st.subheader("πŸ–Ό Image Gallery") image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg") image_cols = st.slider("Gallery Columns πŸ–Ό", min_value=1, max_value=15, value=5, key="image_cols") if image_files: cols = st.columns(image_cols) for idx, image_file in enumerate(image_files): with cols[idx % image_cols]: try: img = Image.open(image_file) st.image(img, caption=image_file, use_container_width=True) display_glossary_entity(os.path.splitext(image_file)[0]) except Exception as e: st.warning(f"Could not load image {image_file}: {e}") else: st.info("No images found in the current directory.") st.markdown("---") st.subheader("πŸŽ₯ Video Gallery") video_files = glob.glob("*.mp4") video_cols = st.slider("Gallery Columns 🎬", min_value=1, max_value=5, value=3, key="video_cols") if video_files: cols = st.columns(video_cols) for idx, video_file in enumerate(video_files): with cols[idx % video_cols]: st.markdown(get_video_html(video_file, width="100%"), unsafe_allow_html=True) display_glossary_entity(os.path.splitext(video_file)[0]) else: st.info("No videos found in the current directory.") with tab2: st.header("πŸ§ͺ Python Code Executor & Demo") import io, sys from contextlib import redirect_stdout DEFAULT_CODE = '''import streamlit as st import random st.title("πŸ“Š Demo App") st.markdown("Random number and color demo") col1, col2 = st.columns(2) with col1: num = st.number_input("Number:", 1, 100, 10) mul = st.slider("Multiplier:", 1, 10, 2) if st.button("Calc"): st.write(num * mul) with col2: color = st.color_picker("Pick color","#ff0000") st.markdown(f'
Color
', unsafe_allow_html=True) ''' def extract_python_code(md: str) -> list: return re.findall(r"```python\s*(.*?)```", md, re.DOTALL) def execute_code(code: str) -> tuple: buf = io.StringIO(); local_vars = {} try: with redirect_stdout(buf): exec(code, {}, local_vars) return buf.getvalue(), None except Exception as e: return None, str(e) up = st.file_uploader("Upload .py or .md", type=['py', 'md']) if 'code' not in st.session_state: st.session_state.code = DEFAULT_CODE if up: text = up.getvalue().decode() if up.type == 'text/markdown': codes = extract_python_code(text) if codes: st.session_state.code = codes[0].strip() else: st.warning("No Python code block found in the markdown file.") st.session_state.code = '' else: st.session_state.code = text.strip() st.code(st.session_state.code, language='python') else: st.session_state.code = st.text_area("πŸ’» Code Editor", value=st.session_state.code, height=400) c1, c2 = st.columns([1, 1]) if c1.button("▢️ Run Code"): if st.session_state.code.strip(): out, err = execute_code(st.session_state.code) if err: st.error(f"Execution Error:\n{err}") elif out: st.subheader("Output:") st.code(out) else: st.success("Executed with no standard output.") else: st.warning("No code to run.") if c2.button("πŸ—‘οΈ Clear Code"): st.session_state.code = '' st.rerun()