Spaces:
Sleeping
Sleeping
import io | |
import os | |
import re | |
import glob | |
import textwrap | |
from datetime import datetime | |
from pathlib import Path | |
import streamlit as st | |
import pandas as pd | |
from PIL import Image | |
from reportlab.pdfgen import canvas | |
from reportlab.lib.pagesizes import letter # Using letter size for consistency | |
from reportlab.lib.utils import ImageReader | |
import mistune | |
from gtts import gTTS | |
# Page config | |
st.set_page_config(page_title="PDF & Code Interpreter", layout="wide", page_icon="๐") | |
def delete_asset(path): | |
"""Deletes a file asset and reruns the app.""" | |
try: | |
os.remove(path) | |
# Also remove from session state selection if it exists | |
if 'selected_assets' in st.session_state and path in st.session_state.selected_assets: | |
del st.session_state.selected_assets[path] | |
except Exception as e: | |
st.error(f"Error deleting file: {e}") | |
st.rerun() | |
# --- Function to Generate Combined PDF --- | |
def generate_combined_pdf(selected_asset_paths): | |
"""Generates a single PDF from selected markdown and image file paths.""" | |
buf = io.BytesIO() | |
c = canvas.Canvas(buf) | |
# --- Process Markdown Files --- | |
all_plain_text = "" | |
md_count = 0 | |
for path in selected_asset_paths: | |
# Process only markdown files first | |
if path.lower().endswith('.md'): | |
md_count += 1 | |
try: | |
with open(path, 'r', encoding='utf-8') as f: | |
md_text = f.read() | |
# Convert Markdown to plain text using mistune (removes formatting but keeps content) | |
renderer = mistune.HTMLRenderer() | |
markdown = mistune.create_markdown(renderer=renderer) | |
html = markdown(md_text or "") | |
plain_text = re.sub(r'<[^>]+>', '', html) # Strip HTML tags | |
if all_plain_text: | |
all_plain_text += "\n\n---\n\n" # Add a separator between combined MD files | |
all_plain_text += plain_text | |
except Exception as e: | |
st.warning(f"Could not read or process markdown file {path}: {e}") | |
# Decide if you want to continue or stop if an MD fails | |
# Render combined markdown content if any was found | |
if all_plain_text.strip(): | |
# --- Canvas Text Rendering (2 columns, 14pt font) --- | |
page_w, page_h = letter # Use standard letter size (8.5 x 11 inches, approx 612 x 792 points) | |
margin = 40 # Margin around the content area (points) | |
gutter = 15 # Space between columns (points) | |
num_columns = 2 # Fixed number of columns as requested | |
# Calculate available width for text and column width | |
available_text_width = page_w - 2 * margin | |
col_w = (available_text_width - (num_columns - 1) * gutter) / num_columns | |
font_family = "Helvetica" # A standard font available in ReportLab canvas | |
font_size = 14 # Font size as requested | |
c.setFont(font_family, font_size) | |
# Estimate line height and character width for text wrapping | |
# ReportLab measures in points. Approximating char width for wrapping. | |
# A common approximation for average character width is font_size * 0.6 | |
avg_char_width_points = font_size * 0.6 | |
# wrap_width is the number of characters that fit in one line of a column | |
wrap_width = int(col_w / avg_char_width_points) if avg_char_width_points > 0 else 100 # Prevent division by zero | |
line_height = font_size * 1.3 # Line spacing (e.g., 1.3 times font size) | |
# Initialize column and vertical position | |
col = 0 | |
x = margin + col * (col_w + gutter) # Starting x for the first column | |
y = page_h - margin # Starting y from the top margin | |
paragraphs = all_plain_text.split("\n") | |
for paragraph in paragraphs: | |
# Handle empty lines (add vertical space) | |
if not paragraph.strip(): | |
y -= line_height / 2 # Add less space for blank lines compared to paragraphs | |
# Check for page/column break after adding vertical space | |
if y < margin: | |
col += 1 | |
if col >= num_columns: | |
c.showPage() # Move to a new page | |
c.setFont(font_family, font_size) # Re-set font after new page | |
col = 0 # Reset to the first column | |
x = margin + col * (col_w + gutter) # Reset x position | |
y = page_h - margin # Reset y position to top margin | |
else: | |
# Move to the next column on the same page | |
x = margin + col * (col_w + gutter) | |
y = page_h - margin # Reset y position to top margin | |
continue # Move to the next paragraph | |
# Wrap the paragraph text into lines that fit the column width | |
lines = textwrap.wrap(paragraph, wrap_width) | |
for line in lines: | |
# Check for page/column break before drawing the line | |
if y < margin: | |
col += 1 | |
if col >= num_columns: | |
c.showPage() # Move to a new page | |
c.setFont(font_family, font_size) # Re-set font after new page | |
col = 0 # Reset to the first column | |
x = margin + col * (col_w + gutter) # Reset x position | |
y = page_h - margin # Reset y position to top margin | |
else: | |
# Move to the next column on the same page | |
x = margin + col * (col_w + gutter) | |
y = page_h - margin # Reset y position to top margin | |
# Draw the line | |
c.drawString(x, y, line) | |
# Move y position down for the next line | |
y -= line_height | |
# Add extra space after a paragraph (except the last one) | |
if paragraph != paragraphs[-1] or lines: # Add space if it's not the very last line of the last paragraph | |
y -= line_height / 2 | |
# After all markdown text, ensure subsequent images start on a new page | |
if all_plain_text.strip(): | |
c.showPage() # Start images on a fresh page | |
# --- Process Image Files --- | |
image_count = 0 | |
for path in selected_asset_paths: | |
# Process image files after markdown | |
if path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): # Add other image types if needed | |
image_count += 1 | |
try: | |
img = Image.open(path) | |
img_w, img_h = img.size | |
# Get current page size (should be letter if no text was added or after showPage) | |
page_w, page_h = letter | |
margin_img = 40 # Margin around the image on the page | |
# Calculate available space within margins on the page | |
available_w = page_w - 2 * margin_img | |
available_h = page_h - 2 * margin_img | |
# Calculate scaling factor to fit the image within the available space while preserving aspect ratio | |
scale = min(available_w / img_w, available_h / img_h) | |
draw_w = img_w * scale | |
draw_h = img_h * scale | |
# Calculate position to center the scaled image on the page | |
pos_x = margin_img + (available_w - draw_w) / 2 | |
# Position from the bottom left corner | |
pos_y = margin_img + (available_h - draw_h) / 2 | |
# Draw the image. Ensure it's on a new page. | |
# If this is the first image and no text was added, it will use the initial page. | |
# Otherwise, showPage() is called before drawing. | |
if image_count > 1 or all_plain_text.strip(): | |
c.showPage() # Start a new page for this image | |
# Draw the image onto the current page | |
# Use the path directly with c.drawImage for files on disk | |
c.drawImage(path, pos_x, pos_y, width=draw_w, height=draw_h, preserveAspectRatio=True) | |
except Exception as e: | |
st.warning(f"Could not process image file {path}: {e}") | |
continue # Continue with other selected assets | |
# If no markdown or images were selected/processed | |
if not all_plain_text.strip() and image_count == 0: | |
page_w, page_h = letter | |
c.drawString(40, page_h - 40, "No selected markdown or image files to generate PDF.") | |
c.save() # Finalize the PDF | |
buf.seek(0) # Rewind the buffer to the beginning | |
return buf.getvalue() # Return the PDF bytes | |
# --- End of Combined PDF Function --- | |
# Tabs setup | |
tab1, tab2 = st.tabs(["๐ PDF Composer", "๐งช Code Interpreter"]) | |
with tab1: | |
st.header("๐ PDF Composer & Voice Generator ๐") | |
# Sidebar settings for the original PDF composer | |
# These settings (columns, font size for the *first* PDF button) are separate | |
# from the settings for the combined PDF generation below. | |
st.sidebar.markdown("### Original PDF Composer Settings") | |
columns = st.sidebar.slider("Text columns (Original PDF)", 1, 3, 1) | |
font_family = st.sidebar.selectbox("Font (Original PDF)", ["Helvetica","Times-Roman","Courier"]) | |
font_size = st.sidebar.slider("Font size (Original PDF)", 6, 24, 12) | |
# Markdown input for the original PDF composer | |
st.markdown("#### Original PDF Composer Input") | |
md_file = st.file_uploader("Upload Markdown (.md) for Original PDF", type=["md"]) | |
if md_file: | |
md_text = md_file.getvalue().decode("utf-8") | |
# Use stem from uploaded file or timestamp if text area is used | |
original_pdf_stem = Path(md_file.name).stem | |
else: | |
md_text = st.text_area("Or enter markdown text directly for Original PDF", height=200) | |
original_pdf_stem = datetime.now().strftime('%Y%m%d_%H%M%S') | |
# Convert Markdown to plain text for original PDF | |
renderer = mistune.HTMLRenderer() | |
markdown = mistune.create_markdown(renderer=renderer) | |
html = markdown(md_text or "") | |
original_pdf_plain_text = re.sub(r'<[^>]+>', '', html) # Strip HTML tags | |
# Voice settings (Applies to the text entered above) | |
st.markdown("#### Voice Generation from Text Input") | |
languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"} | |
voice_choice = st.selectbox("Voice Language", list(languages.keys())) | |
voice_lang = languages[voice_choice] | |
slow = st.checkbox("Slow Speech") | |
if st.button("๐ Generate & Download Voice MP3 from Text"): | |
if original_pdf_plain_text.strip(): | |
voice_file = f"{original_pdf_stem}.mp3" | |
try: | |
# Using the plain text from the text area/uploaded MD for voice | |
tts = gTTS(text=original_pdf_plain_text, lang=voice_lang, slow=slow) | |
tts.save(voice_file) | |
st.audio(voice_file) | |
with open(voice_file, 'rb') as mp3: | |
st.download_button("๐ฅ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg") | |
except Exception as e: | |
st.error(f"Error generating voice: {e}") | |
else: | |
st.warning("No text to generate voice from.") | |
# Image uploads and ordering for the original PDF composer | |
st.markdown("#### Images for Original PDF") | |
imgs = st.file_uploader("Upload Images for Original PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True) | |
ordered_images_original_pdf = [] | |
if imgs: | |
# Create a DataFrame for editing image order | |
df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)]) | |
# Use num_rows="dynamic" for better UI, though less relevant if not adding/deleting rows | |
edited = st.data_editor(df_imgs, use_container_width=True) | |
# Reconstruct the ordered list of file objects based on edited order | |
for _, row in edited.sort_values("order").iterrows(): | |
for f in imgs: | |
if f.name == row['name']: | |
ordered_images_original_pdf.append(f) | |
break # Found the file object, move to the next row | |
# --- Original PDF Generation Button --- | |
if st.button("๐๏ธ Generate Original PDF with Markdown & Images"): | |
if not original_pdf_plain_text.strip() and not ordered_images_original_pdf: | |
st.warning("Please provide some text or upload images to generate the Original PDF.") | |
else: | |
buf = io.BytesIO() | |
c = canvas.Canvas(buf) | |
# Render text using original settings and logic if text is provided | |
if original_pdf_plain_text.strip(): | |
page_w, page_h = letter | |
margin = 40 | |
gutter = 20 | |
col_w = (page_w - 2*margin - (columns-1)*gutter) / columns | |
c.setFont(font_family, font_size) | |
line_height = font_size * 1.2 | |
col = 0 | |
x = margin | |
y = page_h - margin | |
# Estimate wrap width | |
avg_char_width = font_size * 0.6 | |
wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100 | |
for paragraph in original_pdf_plain_text.split("\n"): | |
if not paragraph.strip(): # Handle empty lines | |
y -= line_height / 2 | |
if y < margin: # Check for column/page break | |
col += 1 | |
if col >= columns: | |
c.showPage() | |
c.setFont(font_family, font_size) | |
col = 0 | |
x = margin + col*(col_w+gutter) | |
y = page_h - margin | |
else: | |
x = margin + col*(col_w+gutter) | |
y = page_h - margin | |
continue | |
lines = textwrap.wrap(paragraph, wrap_width) if paragraph.strip() else [""] | |
for line in lines: | |
if y < margin: # Check for column/page break | |
col += 1 | |
if col >= columns: | |
c.showPage() | |
c.setFont(font_family, font_size) | |
col = 0 | |
x = margin + col*(col_w+gutter) | |
y = page_h - margin | |
else: | |
x = margin + col*(col_w+gutter) | |
y = page_h - margin | |
c.drawString(x, y, line) | |
y -= line_height | |
y -= line_height / 2 # Space after paragraph | |
# Ensure images start on a new page if text was added | |
if original_pdf_plain_text.strip(): | |
c.showPage() | |
# Autosize pages to each uploaded image | |
image_count = 0 | |
for img_f in ordered_images_original_pdf: | |
image_count += 1 | |
try: | |
img = Image.open(img_f) # img_f is a file-like object from st.file_uploader | |
w, h = img.size | |
# Start a new page for each image | |
if image_count > 1 or original_pdf_plain_text.strip(): | |
c.showPage() | |
# Draw image scaled to fit a letter page within margins, centered | |
page_w, page_h = letter | |
margin_img = 40 | |
available_w = page_w - 2 * margin_img | |
available_h = page_h - 2 * margin_img | |
scale = min(available_w / w, available_h / h) | |
draw_w = w * scale | |
draw_h = h * scale | |
pos_x = margin_img + (available_w - draw_w) / 2 | |
pos_y = margin_img + (available_h - draw_h) / 2 | |
# Use ImageReader for file-like objects | |
c.drawImage(ImageReader(img_f), pos_x, pos_y, width=draw_w, height=draw_h, preserveAspectRatio=True) | |
except Exception as e: | |
st.warning(f"Could not process uploaded image {img_f.name}: {e}") | |
continue | |
# If nothing was generated | |
if not original_pdf_plain_text.strip() and not ordered_images_original_pdf: | |
page_w, page_h = letter | |
c.drawString(40, page_h - 40, "No content to generate Original PDF.") | |
c.save() | |
buf.seek(0) | |
pdf_name = f"{original_pdf_stem}.pdf" | |
st.download_button("โฌ๏ธ Download Original PDF", data=buf, file_name=pdf_name, mime="application/pdf") | |
st.markdown("---") | |
st.subheader("๐ Available Assets") | |
st.markdown("Select assets below to include in a combined PDF.") | |
# Get all files and filter out unwanted ones | |
all_assets = glob.glob("*.*") | |
# Removed '.txt' from excluded extensions | |
excluded_extensions = ['.py', '.ttf'] | |
excluded_files = ['README.md', 'index.html'] | |
assets = sorted([ | |
a for a in all_assets | |
# Check if extension is in excluded list OR if the full name is in excluded files | |
if not (a.lower().endswith(tuple(excluded_extensions)) or os.path.basename(a) in excluded_files) | |
]) | |
# Initialize session state for selected assets if not already done | |
if 'selected_assets' not in st.session_state: | |
st.session_state.selected_assets = {} | |
# Ensure all current assets have an entry in session state, initialize to False if new | |
# Clean up session state from assets that no longer exist | |
current_asset_paths = [os.path.abspath(a) for a in assets] | |
st.session_state.selected_assets = { | |
k: v for k, v in st.session_state.selected_assets.items() | |
if os.path.abspath(k) in current_asset_paths # Keep only existing assets | |
} | |
for asset_path in assets: | |
if asset_path not in st.session_state.selected_assets: | |
st.session_state.selected_assets[asset_path] = False | |
# --- Display Assets with Checkboxes --- | |
if not assets: | |
st.info("No available assets found.") | |
else: | |
# Header row for clarity | |
header_cols = st.columns([0.5, 3, 1, 1]) | |
header_cols[1].write("**File**") | |
# header_cols[2].write("**Action**") # Optional header | |
for a in assets: | |
ext = a.split('.')[-1].lower() | |
cols = st.columns([0.5, 3, 1, 1]) | |
# Checkbox in the first column, updating session state | |
# Use absolute path for robust keying in case of directory changes (less likely in Streamlit sharing, but good practice) | |
asset_key = os.path.abspath(a) | |
st.session_state.selected_assets[a] = cols[0].checkbox("", value=st.session_state.selected_assets.get(a, False), key=f"select_asset_{asset_key}") | |
# File name in the second column | |
cols[1].write(a) | |
# Provide download/preview based on file type in the third column | |
try: | |
if ext == 'pdf': | |
with open(a, 'rb') as fp: | |
cols[2].download_button("๐ฅ", data=fp, file_name=a, mime="application/pdf", key=f"download_{a}") | |
elif ext == 'mp3': | |
# Audio player takes up too much space here, just offer download | |
with open(a, 'rb') as mp3: | |
cols[2].download_button("๐ฅ", data=mp3, file_name=a, mime="audio/mpeg", key=f"download_{a}") | |
# Offer download for common text files like txt, csv etc. | |
elif ext in ['md', 'txt', 'csv', 'json', 'xml', 'log']: | |
with open(a, 'r', encoding='utf-8') as text_file: | |
cols[2].download_button("โฌ๏ธ", data=text_file.read(), file_name=a, mime="text/plain", key=f"download_{a}") | |
# Offer download for common image files | |
elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff']: | |
with open(a, 'rb') as img_file: | |
cols[2].download_button("โฌ๏ธ", data=img_file.read(), file_name=a, mime=f"image/{ext}", key=f"download_{a}") | |
# Offer download for common video files (Streamlit doesn't have easy preview here) | |
elif ext in ['mp4', 'webm', 'ogg', 'avi', 'mov']: | |
with open(a, 'rb') as video_file: | |
cols[2].download_button("โฌ๏ธ", data=video_file.read(), file_name=a, mime=f"video/{ext}", key=f"download_{a}") | |
# Handle other file types - maybe just offer download with guessed mime | |
else: | |
with open(a, 'rb') as other_file: | |
cols[2].download_button("โฌ๏ธ", data=other_file.read(), file_name=a, key=f"download_{a}") # Mime type is guessed by streamlit | |
# Delete button in the fourth column | |
cols[3].button("๐๏ธ", key=f"del_{a}", on_click=delete_asset, args=(a,)) | |
except Exception as e: | |
# Display error next to the file if handling fails | |
cols[3].error(f"Error: {e}") | |
# --- Combined PDF Generation Button --- | |
# Only show button if there are any assets listed | |
if assets: | |
if st.button("Generate Combined PDF from Selected Assets"): | |
# Get the list of selected asset paths | |
selected_asset_paths = [path for path, selected in st.session_state.selected_assets.items() if selected] | |
if not selected_asset_paths: | |
st.warning("Please select at least one asset.") | |
else: | |
with st.spinner("Generating combined PDF..."): | |
try: | |
# Call the new function to generate the combined PDF | |
combined_pdf_bytes = generate_combined_pdf(selected_asset_paths) | |
if combined_pdf_bytes: # Check if the function returned bytes (meaning content was added) | |
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
pdf_name = f"Combined_Assets_{timestamp}.pdf" | |
# Provide the generated PDF for download | |
st.download_button( | |
"โฌ๏ธ Download Combined PDF", | |
data=combined_pdf_bytes, | |
file_name=pdf_name, | |
mime="application/pdf" | |
) | |
st.success("Combined PDF generated!") | |
else: | |
# This case might happen if selected files couldn't be read/processed | |
st.warning("Generated PDF is empty. Check selected files or console for errors.") | |
except Exception as e: | |
st.error(f"An unexpected error occurred during PDF generation: {e}") | |
# --- Image Gallery --- | |
st.markdown("---") | |
st.subheader("๐ผ๏ธ Image Gallery") | |
# Find common image file types | |
image_files = sorted(glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg") + glob.glob("*.gif") + glob.glob("*.bmp") + glob.glob("*.tiff")) | |
if not image_files: | |
st.info("No image files found in the directory.") | |
else: | |
# Slider to control the number of columns in the gallery | |
image_cols = st.slider("Image Gallery Columns", min_value=1, max_value=10, value=5) | |
# Ensure image_cols is at least 1 | |
image_cols = max(1, image_cols) | |
# Display images in columns | |
cols = st.columns(image_cols) | |
for idx, image_file in enumerate(image_files): | |
with cols[idx % image_cols]: # Cycle through columns | |
try: | |
img = Image.open(image_file) | |
st.image(img, caption=os.path.basename(image_file), use_container_width=True) | |
except Exception as e: | |
st.warning(f"Could not display image {image_file}: {e}") | |
# --- Video Gallery --- | |
st.markdown("---") | |
st.subheader("๐ฅ Video Gallery") | |
# Find common video file types | |
video_files = sorted(glob.glob("*.mp4") + glob.glob("*.webm") + glob.glob("*.ogg") + glob.glob("*.avi") + glob.glob("*.mov")) | |
if not video_files: | |
st.info("No video files found in the directory.") | |
else: | |
# Slider to control the number of columns in the gallery | |
video_cols = st.slider("Video Gallery Columns", min_value=1, max_value=5, value=3) | |
# Ensure video_cols is at least 1 | |
video_cols = max(1, video_cols) | |
# Display videos in columns | |
cols = st.columns(video_cols) | |
for idx, video_file in enumerate(video_files): | |
with cols[idx % video_cols]: # Cycle through columns | |
try: | |
# Streamlit's built-in video player is simpler than custom HTML | |
st.video(video_file, caption=os.path.basename(video_file)) | |
except Exception as e: | |
st.warning(f"Could not display video {video_file}: {e}") | |
with tab2: | |
st.header("๐งช Python Code Executor & Demo") | |
import io, sys | |
from contextlib import redirect_stdout | |
DEFAULT_CODE = '''import streamlit as st | |
import random | |
st.title("๐ Demo App") | |
st.markdown("Random number and color demo") | |
col1, col2 = st.columns(2) | |
with col1: | |
num = st.number_input("Number:", 1, 100, 10) | |
mul = st.slider("Multiplier:", 1, 10, 2) | |
if st.button("Calc"): | |
st.write(num * mul) | |
with col2: | |
color = st.color_picker("Pick color","#ff0000") | |
st.markdown(f'<div style="background:{color};padding:10px;">Color</div>', unsafe_allow_html=True) | |
''' # noqa | |
def extract_python_code(md: str) -> list: | |
# Find all blocks starting with ```python and ending with ``` | |
return re.findall(r"```python\s*(.*?)```", md, re.DOTALL) | |
def execute_code(code: str) -> tuple: | |
buf = io.StringIO(); local_vars = {} | |
# Redirect stdout to capture print statements | |
try: | |
with redirect_stdout(buf): | |
# Use exec to run the code. locals() and globals() are needed. | |
# Passing empty dicts might limit some functionalities but provides isolation. | |
exec(code, {}, local_vars) | |
return buf.getvalue(), None # Return captured output | |
except Exception as e: | |
return None, str(e) # Return error message | |
up = st.file_uploader("Upload .py or .md", type=['py', 'md']) | |
# Initialize session state for code if it doesn't exist | |
if 'code' not in st.session_state: | |
st.session_state.code = DEFAULT_CODE | |
if up: | |
text = up.getvalue().decode() | |
if up.type == 'text/markdown': | |
codes = extract_python_code(text) | |
if codes: | |
# Take the first python code block found | |
st.session_state.code = codes[0].strip() | |
else: | |
st.warning("No Python code block found in the markdown file.") | |
st.session_state.code = '' # Clear code if no block found | |
else: # .py file | |
st.session_state.code = text.strip() | |
# Display the code after upload | |
st.code(st.session_state.code, language='python') | |
else: | |
# Text area for code editing if no file is uploaded or after processing upload | |
st.session_state.code = st.text_area("๐ป Code Editor", value=st.session_state.code, height=400) # Increased height | |
c1, c2 = st.columns([1, 1]) | |
if c1.button("โถ๏ธ Run Code"): | |
if st.session_state.code.strip(): | |
out, err = execute_code(st.session_state.code) | |
if err: | |
st.error(f"Execution Error:\n{err}") | |
elif out: | |
st.subheader("Output:") | |
st.code(out) | |
else: | |
st.success("Executed with no standard output.") | |
else: | |
st.warning("No code to run.") | |
if c2.button("๐๏ธ Clear Code"): | |
st.session_state.code = '' | |
st.rerun() |