import io
import os
import re
import glob
import textwrap
from datetime import datetime
from pathlib import Path
import streamlit as st
import pandas as pd
from PIL import Image
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.utils import ImageReader
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib import colors
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import mistune
import fitz
import edge_tts
import asyncio
import base64
from urllib.parse import quote
# Page config
st.set_page_config(page_title="PDF & Code Interpreter", layout="wide", page_icon="π")
def delete_asset(path):
try:
os.remove(path)
except Exception as e:
st.error(f"Error deleting file: {e}")
st.rerun()
async def generate_audio(text, voice, filename):
communicate = edge_tts.Communicate(text, voice)
await communicate.save(filename)
return filename
def clean_for_speech(text):
text = text.replace("#", "")
emoji_pattern = re.compile(
r"[\U0001F300-\U0001F5FF"
r"\U0001F600-\U0001F64F"
r"\U0001F680-\U0001F6FF"
r"\U0001F700-\U0001F77F"
r"\U0001F780-\U0001F7FF"
r"\U0001F800-\U0001F8FF"
r"\U0001F900-\U0001F9FF"
r"\U0001FA00-\U0001FA6F"
r"\U0001FA70-\U0001FAFF"
r"\u2600-\u26FF"
r"\u2700-\u27BF]+", flags=re.UNICODE)
return emoji_pattern.sub('', text)
def detect_and_convert_links(text):
md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
text = md_link_pattern.sub(r'\1', text)
url_pattern = re.compile(r'(?{}]+)', re.IGNORECASE)
text = url_pattern.sub(r'\1', text)
return text
def apply_emoji_font(text, emoji_font):
tag_pattern = re.compile(r'(<[^>]+>)')
segments = tag_pattern.split(text)
result = []
emoji_pattern = re.compile(
r"([\U0001F300-\U0001F5FF"
r"\U0001F600-\U0001F64F"
r"\U0001F680-\U0001F6FF"
r"\U0001F700-\U0001F77F"
r"\U0001F780-\U0001F7FF"
r"\U0001F800-\U0001F8FF"
r"\U0001F900-\U0001F9FF"
r"\U0001FAD0-\U0001FAD9"
r"\U0001FA00-\U0001FA6F"
r"\U0001FA70-\U0001FAFF"
r"\u2600-\u26FF"
r"\u2700-\u27BF]+)"
)
def replace_emoji(match):
emoji = match.group(1)
return f'{emoji}'
for segment in segments:
if tag_pattern.match(segment):
result.append(segment)
else:
parts = []
last_pos = 0
for match in emoji_pattern.finditer(segment):
start, end = match.span()
if last_pos < start:
parts.append(f'{segment[last_pos:start]}')
parts.append(replace_emoji(match))
last_pos = end
if last_pos < len(segment):
parts.append(f'{segment[last_pos:]}')
result.append(''.join(parts))
return ''.join(result)
def markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts):
lines = markdown_text.strip().split('\n')
pdf_content = []
number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
heading_pattern = re.compile(r'^(#{1,4})\s+(.+)$')
first_numbered_seen = False
for line in lines:
line = line.strip()
if not line:
continue
if headings_to_fonts and line.startswith('#'):
heading_match = heading_pattern.match(line)
if heading_match:
level = len(heading_match.group(1))
heading_text = heading_match.group(2).strip()
formatted_heading = f"{heading_text}"
pdf_content.append(formatted_heading)
continue
is_numbered_line = number_pattern.match(line) is not None
if add_space_before_numbered and is_numbered_line:
if first_numbered_seen and not line.startswith("1."):
pdf_content.append("")
if not first_numbered_seen:
first_numbered_seen = True
line = detect_and_convert_links(line)
line = re.sub(r'\*\*(.+?)\*\*', r'\1', line)
line = re.sub(r'\*([^*]+?)\*', r'\1', line)
pdf_content.append(line)
total_lines = len(pdf_content)
return pdf_content, total_lines
def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined Document"):
if not markdown_texts and not image_files:
return None
buffer = io.BytesIO()
page_width = A4[0] * 2
page_height = A4[1]
doc = SimpleDocTemplate(
buffer,
pagesize=(page_width, page_height),
leftMargin=36,
rightMargin=36,
topMargin=36,
bottomMargin=36,
title=doc_title
)
styles = getSampleStyleSheet()
spacer_height = 10
try:
pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", "NotoEmoji-Bold.ttf"))
except Exception as e:
st.error(f"Font registration error: {e}")
return None
story = []
for markdown_text in markdown_texts:
pdf_content, total_lines = markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_to_fonts)
total_chars = sum(len(line) for line in pdf_content)
hierarchy_weight = sum(1.5 if line.startswith("") else 1 for line in pdf_content)
longest_line_words = max(len(line.split()) for line in pdf_content) if pdf_content else 0
content_density = total_lines * hierarchy_weight + total_chars / 50
usable_height = page_height - 72 - spacer_height
usable_width = page_width - 72
avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
col_width = usable_width / num_columns
min_font_size = 5
max_font_size = 16
lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
estimated_font_size = int(target_height_per_line / 1.5)
adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
if avg_line_chars > col_width / adjusted_font_size * 10:
adjusted_font_size = int(col_width / (avg_line_chars / 10))
adjusted_font_size = max(min_font_size, adjusted_font_size)
if longest_line_words > 17 or lines_per_col > 20:
font_scale = min(17 / max(longest_line_words, 17), 60 / max(lines_per_col, 20))
adjusted_font_size = max(min_font_size, int(base_font_size * font_scale))
item_style = ParagraphStyle(
'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
linkUnderline=True
)
numbered_bold_style = ParagraphStyle(
'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
linkUnderline=True
)
section_style = ParagraphStyle(
'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
linkUnderline=True
)
columns = [[] for _ in range(num_columns)]
lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
current_line_count = 0
current_column = 0
number_pattern = re.compile(r'^\d+(\.\d+)*\.\s')
for item in pdf_content:
if current_line_count >= lines_per_column and current_column < num_columns - 1:
current_column += 1
current_line_count = 0
columns[current_column].append(item)
current_line_count += 1
column_cells = [[] for _ in range(num_columns)]
for col_idx, column in enumerate(columns):
for item in column:
if isinstance(item, str):
heading_match = re.match(r'(.*?)', item) if headings_to_fonts else None
if heading_match:
level = int(heading_match.group(1))
heading_text = heading_match.group(2)
heading_style = ParagraphStyle(
f'Heading{level}Style',
parent=styles['Heading1'],
fontName="DejaVuSans",
textColor=colors.darkblue if level == 1 else (colors.black if level > 2 else colors.blue),
fontSize=adjusted_font_size * (1.6 - (level-1)*0.15),
leading=adjusted_font_size * (1.8 - (level-1)*0.15),
spaceAfter=4 - (level-1),
spaceBefore=6 - (level-1),
linkUnderline=True
)
column_cells[col_idx].append(Paragraph(apply_emoji_font(heading_text, "NotoEmoji-Bold"), heading_style))
elif item.startswith("") and item.endswith(""):
content = item[3:-4].strip()
if number_pattern.match(content):
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
else:
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
else:
column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "NotoEmoji-Bold"), item_style))
else:
column_cells[col_idx].append(Paragraph(apply_emoji_font(str(item), "NotoEmoji-Bold"), item_style))
max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
for cells in column_cells:
cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
table_data = list(zip(*column_cells)) if column_cells else [[]]
table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
table.setStyle(TableStyle([
('VALIGN', (0, 0), (-1, -1), 'TOP'),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('BACKGROUND', (0, 0), (-1, -1), colors.white),
('GRID', (0, 0), (-1, -1), 0, colors.white),
('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
('LEFTPADDING', (0, 0), (-1, -1), 2),
('RIGHTPADDING', (0, 0), (-1, -1), 2),
('TOPPADDING', (0, 0), (-1, -1), 1),
('BOTTOMPADDING', (0, 0), (-1, -1), 1),
]))
story.append(Spacer(1, spacer_height))
story.append(table)
story.append(Spacer(1, spacer_height * 2))
for img_path in image_files:
try:
img = Image.open(img_path)
img_width, img_height = img.size
page_width, page_height = A4
scale = min((page_width - 40) / img_width, (page_height - 40) / img_height)
new_width = img_width * scale
new_height = img_height * scale
story.append(ReportLabImage(img_path, width=new_width, height=new_height))
story.append(Spacer(1, spacer_height))
except Exception as e:
st.warning(f"Could not process image {img_path}: {e}")
continue
doc.build(story)
buffer.seek(0)
return buffer.getvalue()
def pdf_to_image(pdf_bytes):
if pdf_bytes is None:
return None
try:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
images = []
for page in doc:
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(img)
doc.close()
return images
except Exception as e:
st.error(f"Failed to render PDF preview: {e}")
return None
def get_video_html(video_path, width="100%"):
try:
video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
return f'''
'''
except Exception as e:
st.warning(f"Could not load video {video_path}: {e}")
return ""
def display_glossary_entity(k):
search_urls = {
"ππArXiv": lambda k: f"https://arxiv.org/search/?query={quote(k)}&searchtype=all",
"π": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
"π": lambda k: f"https://www.google.com/search?q={quote(k)}",
"π₯": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
}
links_md = ' '.join([f"[{emoji}]({url(k)})" for emoji, url in search_urls.items()])
st.markdown(f"**{k}** {links_md}", unsafe_allow_html=True)
# Tabs setup
tab1, tab2 = st.tabs(["π PDF Composer", "π§ͺ Code Interpreter"])
with tab1:
st.header("π PDF Composer & Voice Generator π")
# Sidebar PDF text settings
columns = st.sidebar.slider("Text columns", 1, 3, 2)
font_family = st.sidebar.selectbox("Font", ["Helvetica", "Times-Roman", "Courier", "DejaVuSans"])
font_size = st.sidebar.slider("Font size", 6, 24, 14)
# Markdown input
md_file = st.file_uploader("Upload Markdown (.md)", type=["md"])
if md_file:
md_text = md_file.getvalue().decode("utf-8")
stem = Path(md_file.name).stem
else:
md_text = st.text_area("Or enter markdown text directly", height=200)
stem = datetime.now().strftime('%Y%m%d_%H%M%S')
# Convert Markdown to plain text
renderer = mistune.HTMLRenderer()
markdown = mistune.create_markdown(renderer=renderer)
html = markdown(md_text or "")
plain_text = re.sub(r'<[^>]+>', '', html)
# Voice settings
languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"}
voice_choice = st.selectbox("Voice Language", list(languages.keys()))
voice_lang = languages[voice_choice]
slow = st.checkbox("Slow Speech")
VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
if st.button("π Generate & Download Voice MP3 from Text"):
if plain_text.strip():
voice_file = f"{stem}_{selected_voice}.mp3"
try:
cleaned_text = clean_for_speech(plain_text)
audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, voice_file))
st.audio(audio_file)
with open(audio_file, 'rb') as mp3:
st.download_button("π₯ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg")
except Exception as e:
st.error(f"Error generating voice: {e}")
else:
st.warning("No text to generate voice from.")
# Image uploads and ordering
imgs = st.file_uploader("Upload Images for PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
ordered_images = []
if imgs:
df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)])
edited = st.data_editor(df_imgs, use_container_width=True, num_rows="dynamic")
for _, row in edited.sort_values("order").iterrows():
for f in imgs:
if f.name == row['name']:
ordered_images.append(f)
break
if st.button("ποΈ Generate PDF with Markdown & Images"):
if not plain_text.strip() and not ordered_images:
st.warning("Please provide some text or upload images to generate a PDF.")
else:
buf = io.BytesIO()
c = canvas.Canvas(buf)
if plain_text.strip():
page_w, page_h = letter
margin = 40
gutter = 20
col_w = (page_w - 2*margin - (columns-1)*gutter) / columns
c.setFont(font_family, font_size)
line_height = font_size * 1.2
col = 0
x = margin
y = page_h - margin
avg_char_width = font_size * 0.6
wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100
for paragraph in plain_text.split("\n"):
if not paragraph.strip():
y -= line_height
if y < margin:
col += 1
if col >= columns:
c.showPage()
c.setFont(font_family, font_size)
col = 0
x = margin + col*(col_w+gutter)
y = page_h - margin
continue
for line in textwrap.wrap(paragraph, wrap_width):
if y < margin:
col += 1
if col >= columns:
c.showPage()
c.setFont(font_family, font_size)
col = 0
x = margin + col*(col_w+gutter)
y = page_h - margin
c.drawString(x, y, line)
y -= line_height
y -= line_height
for img_f in ordered_images:
try:
img = Image.open(img_f)
w, h = img.size
c.showPage()
c.setPageSize((w, h))
c.drawImage(ImageReader(img), 0, 0, w, h, preserveAspectRatio=False)
except Exception as e:
st.warning(f"Could not process image {img_f.name}: {e}")
continue
c.save()
buf.seek(0)
pdf_name = f"{stem}.pdf"
st.download_button("β¬οΈ Download PDF", data=buf, file_name=pdf_name, mime="application/pdf")
st.markdown("---")
st.subheader("π Available Assets")
all_assets = glob.glob("*.*")
excluded_extensions = ['.py', '.ttf', '.txt']
excluded_files = ['README.md', 'index.html']
assets = sorted([
a for a in all_assets
if not (a.lower().endswith(tuple(excluded_extensions)) or a in excluded_files)
and a.lower().endswith(('.md', '.png', '.jpg', '.jpeg'))
])
if 'selected_assets' not in st.session_state:
st.session_state.selected_assets = []
if not assets:
st.info("No available assets found.")
else:
for a in assets:
ext = a.split('.')[-1].lower()
cols = st.columns([1, 3, 1, 1])
with cols[0]:
is_selected = st.checkbox("", key=f"select_{a}", value=a in st.session_state.selected_assets)
if is_selected and a not in st.session_state.selected_assets:
st.session_state.selected_assets.append(a)
elif not is_selected and a in st.session_state.selected_assets:
st.session_state.selected_assets.remove(a)
cols[1].write(a)
try:
if ext == 'md':
with open(a, 'r', encoding='utf-8') as f:
cols[2].download_button("π₯", data=f.read(), file_name=a, mime="text/markdown")
elif ext in ['png', 'jpg', 'jpeg']:
with open(a, 'rb') as img_file:
cols[2].download_button("β¬οΈ", data=img_file, file_name=a, mime=f"image/{ext}")
cols[3].button("ποΈ", key=f"del_{a}", on_click=delete_asset, args=(a,))
except Exception as e:
cols[3].error(f"Error handling file {a}: {e}")
if st.button("π Generate PDF from Selected Assets"):
if not st.session_state.selected_assets:
st.warning("Please select at least one asset to generate a PDF.")
else:
markdown_texts = []
image_files = []
for a in st.session_state.selected_assets:
ext = a.split('.')[-1].lower()
if ext == 'md':
with open(a, 'r', encoding='utf-8') as f:
markdown_texts.append(f.read())
elif ext in ['png', 'jpg', 'jpeg']:
image_files.append(a)
with st.spinner("Generating PDF from selected assets..."):
pdf_bytes = create_pdf(
markdown_texts=markdown_texts,
image_files=image_files,
base_font_size=14,
num_columns=2,
add_space_before_numbered=True,
headings_to_fonts=True,
doc_title="Combined_Selected_Assets"
)
if pdf_bytes:
pdf_images = pdf_to_image(pdf_bytes)
if pdf_images:
st.subheader("Preview of Generated PDF")
for i, img in enumerate(pdf_images):
st.image(img, caption=f"Page {i+1}", use_container_width=True)
prefix = datetime.now().strftime("%Y%m%d_%H%M%S")
st.download_button(
label="πΎ Download Combined PDF",
data=pdf_bytes,
file_name=f"{prefix}_combined.pdf",
mime="application/pdf"
)
else:
st.error("Failed to generate PDF.")
st.markdown("---")
st.subheader("πΌ Image Gallery")
image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
image_cols = st.slider("Gallery Columns πΌ", min_value=1, max_value=15, value=5, key="image_cols")
if image_files:
cols = st.columns(image_cols)
for idx, image_file in enumerate(image_files):
with cols[idx % image_cols]:
try:
img = Image.open(image_file)
st.image(img, caption=image_file, use_container_width=True)
display_glossary_entity(os.path.splitext(image_file)[0])
except Exception as e:
st.warning(f"Could not load image {image_file}: {e}")
else:
st.info("No images found in the current directory.")
st.markdown("---")
st.subheader("π₯ Video Gallery")
video_files = glob.glob("*.mp4")
video_cols = st.slider("Gallery Columns π¬", min_value=1, max_value=5, value=3, key="video_cols")
if video_files:
cols = st.columns(video_cols)
for idx, video_file in enumerate(video_files):
with cols[idx % video_cols]:
st.markdown(get_video_html(video_file, width="100%"), unsafe_allow_html=True)
display_glossary_entity(os.path.splitext(video_file)[0])
else:
st.info("No videos found in the current directory.")
with tab2:
st.header("π§ͺ Python Code Executor & Demo")
import io, sys
from contextlib import redirect_stdout
DEFAULT_CODE = '''import streamlit as st
import random
st.title("π Demo App")
st.markdown("Random number and color demo")
col1, col2 = st.columns(2)
with col1:
num = st.number_input("Number:", 1, 100, 10)
mul = st.slider("Multiplier:", 1, 10, 2)
if st.button("Calc"):
st.write(num * mul)
with col2:
color = st.color_picker("Pick color","#ff0000")
st.markdown(f'
Color
', unsafe_allow_html=True)
'''
def extract_python_code(md: str) -> list:
return re.findall(r"```python\s*(.*?)```", md, re.DOTALL)
def execute_code(code: str) -> tuple:
buf = io.StringIO(); local_vars = {}
try:
with redirect_stdout(buf):
exec(code, {}, local_vars)
return buf.getvalue(), None
except Exception as e:
return None, str(e)
up = st.file_uploader("Upload .py or .md", type=['py', 'md'])
if 'code' not in st.session_state:
st.session_state.code = DEFAULT_CODE
if up:
text = up.getvalue().decode()
if up.type == 'text/markdown':
codes = extract_python_code(text)
if codes:
st.session_state.code = codes[0].strip()
else:
st.warning("No Python code block found in the markdown file.")
st.session_state.code = ''
else:
st.session_state.code = text.strip()
st.code(st.session_state.code, language='python')
else:
st.session_state.code = st.text_area("π» Code Editor", value=st.session_state.code, height=400)
c1, c2 = st.columns([1, 1])
if c1.button("βΆοΈ Run Code"):
if st.session_state.code.strip():
out, err = execute_code(st.session_state.code)
if err:
st.error(f"Execution Error:\n{err}")
elif out:
st.subheader("Output:")
st.code(out)
else:
st.success("Executed with no standard output.")
else:
st.warning("No code to run.")
if c2.button("ποΈ Clear Code"):
st.session_state.code = ''
st.rerun()