Book-Maker-CVLM-AI-UI-UX / backup.05122025-2.app.py
awacke1's picture
Rename app.py to backup.05122025-2.app.py
3a2870f verified
import io
import os
import re
import glob
import textwrap
from datetime import datetime
from pathlib import Path
import streamlit as st
import pandas as pd
from PIL import Image
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter # Using letter size for consistency
from reportlab.lib.utils import ImageReader
import mistune
from gtts import gTTS
# Page config
st.set_page_config(page_title="PDF & Code Interpreter", layout="wide", page_icon="๐Ÿš€")
def delete_asset(path):
"""Deletes a file asset and reruns the app."""
try:
os.remove(path)
# Also remove from session state selection if it exists
if 'selected_assets' in st.session_state and path in st.session_state.selected_assets:
del st.session_state.selected_assets[path]
except Exception as e:
st.error(f"Error deleting file: {e}")
st.rerun()
# --- Function to Generate Combined PDF ---
def generate_combined_pdf(selected_asset_paths):
"""Generates a single PDF from selected markdown and image file paths."""
buf = io.BytesIO()
c = canvas.Canvas(buf)
# --- Process Markdown Files ---
all_plain_text = ""
md_count = 0
for path in selected_asset_paths:
# Process only markdown files first
if path.lower().endswith('.md'):
md_count += 1
try:
with open(path, 'r', encoding='utf-8') as f:
md_text = f.read()
# Convert Markdown to plain text using mistune (removes formatting but keeps content)
renderer = mistune.HTMLRenderer()
markdown = mistune.create_markdown(renderer=renderer)
html = markdown(md_text or "")
plain_text = re.sub(r'<[^>]+>', '', html) # Strip HTML tags
if all_plain_text:
all_plain_text += "\n\n---\n\n" # Add a separator between combined MD files
all_plain_text += plain_text
except Exception as e:
st.warning(f"Could not read or process markdown file {path}: {e}")
# Decide if you want to continue or stop if an MD fails
# Render combined markdown content if any was found
if all_plain_text.strip():
# --- Canvas Text Rendering (2 columns, 14pt font) ---
page_w, page_h = letter # Use standard letter size (8.5 x 11 inches, approx 612 x 792 points)
margin = 40 # Margin around the content area (points)
gutter = 15 # Space between columns (points)
num_columns = 2 # Fixed number of columns as requested
# Calculate available width for text and column width
available_text_width = page_w - 2 * margin
col_w = (available_text_width - (num_columns - 1) * gutter) / num_columns
font_family = "Helvetica" # A standard font available in ReportLab canvas
font_size = 14 # Font size as requested
c.setFont(font_family, font_size)
# Estimate line height and character width for text wrapping
# ReportLab measures in points. Approximating char width for wrapping.
# A common approximation for average character width is font_size * 0.6
avg_char_width_points = font_size * 0.6
# wrap_width is the number of characters that fit in one line of a column
wrap_width = int(col_w / avg_char_width_points) if avg_char_width_points > 0 else 100 # Prevent division by zero
line_height = font_size * 1.3 # Line spacing (e.g., 1.3 times font size)
# Initialize column and vertical position
col = 0
x = margin + col * (col_w + gutter) # Starting x for the first column
y = page_h - margin # Starting y from the top margin
paragraphs = all_plain_text.split("\n")
for paragraph in paragraphs:
# Handle empty lines (add vertical space)
if not paragraph.strip():
y -= line_height / 2 # Add less space for blank lines compared to paragraphs
# Check for page/column break after adding vertical space
if y < margin:
col += 1
if col >= num_columns:
c.showPage() # Move to a new page
c.setFont(font_family, font_size) # Re-set font after new page
col = 0 # Reset to the first column
x = margin + col * (col_w + gutter) # Reset x position
y = page_h - margin # Reset y position to top margin
else:
# Move to the next column on the same page
x = margin + col * (col_w + gutter)
y = page_h - margin # Reset y position to top margin
continue # Move to the next paragraph
# Wrap the paragraph text into lines that fit the column width
lines = textwrap.wrap(paragraph, wrap_width)
for line in lines:
# Check for page/column break before drawing the line
if y < margin:
col += 1
if col >= num_columns:
c.showPage() # Move to a new page
c.setFont(font_family, font_size) # Re-set font after new page
col = 0 # Reset to the first column
x = margin + col * (col_w + gutter) # Reset x position
y = page_h - margin # Reset y position to top margin
else:
# Move to the next column on the same page
x = margin + col * (col_w + gutter)
y = page_h - margin # Reset y position to top margin
# Draw the line
c.drawString(x, y, line)
# Move y position down for the next line
y -= line_height
# Add extra space after a paragraph (except the last one)
if paragraph != paragraphs[-1] or lines: # Add space if it's not the very last line of the last paragraph
y -= line_height / 2
# After all markdown text, ensure subsequent images start on a new page
if all_plain_text.strip():
c.showPage() # Start images on a fresh page
# --- Process Image Files ---
image_count = 0
for path in selected_asset_paths:
# Process image files after markdown
if path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): # Add other image types if needed
image_count += 1
try:
img = Image.open(path)
img_w, img_h = img.size
# Get current page size (should be letter if no text was added or after showPage)
page_w, page_h = letter
margin_img = 40 # Margin around the image on the page
# Calculate available space within margins on the page
available_w = page_w - 2 * margin_img
available_h = page_h - 2 * margin_img
# Calculate scaling factor to fit the image within the available space while preserving aspect ratio
scale = min(available_w / img_w, available_h / img_h)
draw_w = img_w * scale
draw_h = img_h * scale
# Calculate position to center the scaled image on the page
pos_x = margin_img + (available_w - draw_w) / 2
# Position from the bottom left corner
pos_y = margin_img + (available_h - draw_h) / 2
# Draw the image. Ensure it's on a new page.
# If this is the first image and no text was added, it will use the initial page.
# Otherwise, showPage() is called before drawing.
if image_count > 1 or all_plain_text.strip():
c.showPage() # Start a new page for this image
# Draw the image onto the current page
# Use the path directly with c.drawImage for files on disk
c.drawImage(path, pos_x, pos_y, width=draw_w, height=draw_h, preserveAspectRatio=True)
except Exception as e:
st.warning(f"Could not process image file {path}: {e}")
continue # Continue with other selected assets
# If no markdown or images were selected/processed
if not all_plain_text.strip() and image_count == 0:
page_w, page_h = letter
c.drawString(40, page_h - 40, "No selected markdown or image files to generate PDF.")
c.save() # Finalize the PDF
buf.seek(0) # Rewind the buffer to the beginning
return buf.getvalue() # Return the PDF bytes
# --- End of Combined PDF Function ---
# Tabs setup
tab1, tab2 = st.tabs(["๐Ÿ“„ PDF Composer", "๐Ÿงช Code Interpreter"])
with tab1:
st.header("๐Ÿ“„ PDF Composer & Voice Generator ๐Ÿš€")
# Sidebar settings for the original PDF composer
# These settings (columns, font size for the *first* PDF button) are separate
# from the settings for the combined PDF generation below.
st.sidebar.markdown("### Original PDF Composer Settings")
columns = st.sidebar.slider("Text columns (Original PDF)", 1, 3, 1)
font_family = st.sidebar.selectbox("Font (Original PDF)", ["Helvetica","Times-Roman","Courier"])
font_size = st.sidebar.slider("Font size (Original PDF)", 6, 24, 12)
# Markdown input for the original PDF composer
st.markdown("#### Original PDF Composer Input")
md_file = st.file_uploader("Upload Markdown (.md) for Original PDF", type=["md"])
if md_file:
md_text = md_file.getvalue().decode("utf-8")
# Use stem from uploaded file or timestamp if text area is used
original_pdf_stem = Path(md_file.name).stem
else:
md_text = st.text_area("Or enter markdown text directly for Original PDF", height=200)
original_pdf_stem = datetime.now().strftime('%Y%m%d_%H%M%S')
# Convert Markdown to plain text for original PDF
renderer = mistune.HTMLRenderer()
markdown = mistune.create_markdown(renderer=renderer)
html = markdown(md_text or "")
original_pdf_plain_text = re.sub(r'<[^>]+>', '', html) # Strip HTML tags
# Voice settings (Applies to the text entered above)
st.markdown("#### Voice Generation from Text Input")
languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"}
voice_choice = st.selectbox("Voice Language", list(languages.keys()))
voice_lang = languages[voice_choice]
slow = st.checkbox("Slow Speech")
if st.button("๐Ÿ”Š Generate & Download Voice MP3 from Text"):
if original_pdf_plain_text.strip():
voice_file = f"{original_pdf_stem}.mp3"
try:
# Using the plain text from the text area/uploaded MD for voice
tts = gTTS(text=original_pdf_plain_text, lang=voice_lang, slow=slow)
tts.save(voice_file)
st.audio(voice_file)
with open(voice_file, 'rb') as mp3:
st.download_button("๐Ÿ“ฅ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg")
except Exception as e:
st.error(f"Error generating voice: {e}")
else:
st.warning("No text to generate voice from.")
# Image uploads and ordering for the original PDF composer
st.markdown("#### Images for Original PDF")
imgs = st.file_uploader("Upload Images for Original PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
ordered_images_original_pdf = []
if imgs:
# Create a DataFrame for editing image order
df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)])
# Use num_rows="dynamic" for better UI, though less relevant if not adding/deleting rows
edited = st.data_editor(df_imgs, use_container_width=True)
# Reconstruct the ordered list of file objects based on edited order
for _, row in edited.sort_values("order").iterrows():
for f in imgs:
if f.name == row['name']:
ordered_images_original_pdf.append(f)
break # Found the file object, move to the next row
# --- Original PDF Generation Button ---
if st.button("๐Ÿ–‹๏ธ Generate Original PDF with Markdown & Images"):
if not original_pdf_plain_text.strip() and not ordered_images_original_pdf:
st.warning("Please provide some text or upload images to generate the Original PDF.")
else:
buf = io.BytesIO()
c = canvas.Canvas(buf)
# Render text using original settings and logic if text is provided
if original_pdf_plain_text.strip():
page_w, page_h = letter
margin = 40
gutter = 20
col_w = (page_w - 2*margin - (columns-1)*gutter) / columns
c.setFont(font_family, font_size)
line_height = font_size * 1.2
col = 0
x = margin
y = page_h - margin
# Estimate wrap width
avg_char_width = font_size * 0.6
wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100
for paragraph in original_pdf_plain_text.split("\n"):
if not paragraph.strip(): # Handle empty lines
y -= line_height / 2
if y < margin: # Check for column/page break
col += 1
if col >= columns:
c.showPage()
c.setFont(font_family, font_size)
col = 0
x = margin + col*(col_w+gutter)
y = page_h - margin
else:
x = margin + col*(col_w+gutter)
y = page_h - margin
continue
lines = textwrap.wrap(paragraph, wrap_width) if paragraph.strip() else [""]
for line in lines:
if y < margin: # Check for column/page break
col += 1
if col >= columns:
c.showPage()
c.setFont(font_family, font_size)
col = 0
x = margin + col*(col_w+gutter)
y = page_h - margin
else:
x = margin + col*(col_w+gutter)
y = page_h - margin
c.drawString(x, y, line)
y -= line_height
y -= line_height / 2 # Space after paragraph
# Ensure images start on a new page if text was added
if original_pdf_plain_text.strip():
c.showPage()
# Autosize pages to each uploaded image
image_count = 0
for img_f in ordered_images_original_pdf:
image_count += 1
try:
img = Image.open(img_f) # img_f is a file-like object from st.file_uploader
w, h = img.size
# Start a new page for each image
if image_count > 1 or original_pdf_plain_text.strip():
c.showPage()
# Draw image scaled to fit a letter page within margins, centered
page_w, page_h = letter
margin_img = 40
available_w = page_w - 2 * margin_img
available_h = page_h - 2 * margin_img
scale = min(available_w / w, available_h / h)
draw_w = w * scale
draw_h = h * scale
pos_x = margin_img + (available_w - draw_w) / 2
pos_y = margin_img + (available_h - draw_h) / 2
# Use ImageReader for file-like objects
c.drawImage(ImageReader(img_f), pos_x, pos_y, width=draw_w, height=draw_h, preserveAspectRatio=True)
except Exception as e:
st.warning(f"Could not process uploaded image {img_f.name}: {e}")
continue
# If nothing was generated
if not original_pdf_plain_text.strip() and not ordered_images_original_pdf:
page_w, page_h = letter
c.drawString(40, page_h - 40, "No content to generate Original PDF.")
c.save()
buf.seek(0)
pdf_name = f"{original_pdf_stem}.pdf"
st.download_button("โฌ‡๏ธ Download Original PDF", data=buf, file_name=pdf_name, mime="application/pdf")
st.markdown("---")
st.subheader("๐Ÿ“‚ Available Assets")
st.markdown("Select assets below to include in a combined PDF.")
# Get all files and filter out unwanted ones
all_assets = glob.glob("*.*")
# Removed '.txt' from excluded extensions
excluded_extensions = ['.py', '.ttf']
excluded_files = ['README.md', 'index.html']
assets = sorted([
a for a in all_assets
# Check if extension is in excluded list OR if the full name is in excluded files
if not (a.lower().endswith(tuple(excluded_extensions)) or os.path.basename(a) in excluded_files)
])
# Initialize session state for selected assets if not already done
if 'selected_assets' not in st.session_state:
st.session_state.selected_assets = {}
# Ensure all current assets have an entry in session state, initialize to False if new
# Clean up session state from assets that no longer exist
current_asset_paths = [os.path.abspath(a) for a in assets]
st.session_state.selected_assets = {
k: v for k, v in st.session_state.selected_assets.items()
if os.path.abspath(k) in current_asset_paths # Keep only existing assets
}
for asset_path in assets:
if asset_path not in st.session_state.selected_assets:
st.session_state.selected_assets[asset_path] = False
# --- Display Assets with Checkboxes ---
if not assets:
st.info("No available assets found.")
else:
# Header row for clarity
header_cols = st.columns([0.5, 3, 1, 1])
header_cols[1].write("**File**")
# header_cols[2].write("**Action**") # Optional header
for a in assets:
ext = a.split('.')[-1].lower()
cols = st.columns([0.5, 3, 1, 1])
# Checkbox in the first column, updating session state
# Use absolute path for robust keying in case of directory changes (less likely in Streamlit sharing, but good practice)
asset_key = os.path.abspath(a)
st.session_state.selected_assets[a] = cols[0].checkbox("", value=st.session_state.selected_assets.get(a, False), key=f"select_asset_{asset_key}")
# File name in the second column
cols[1].write(a)
# Provide download/preview based on file type in the third column
try:
if ext == 'pdf':
with open(a, 'rb') as fp:
cols[2].download_button("๐Ÿ“ฅ", data=fp, file_name=a, mime="application/pdf", key=f"download_{a}")
elif ext == 'mp3':
# Audio player takes up too much space here, just offer download
with open(a, 'rb') as mp3:
cols[2].download_button("๐Ÿ“ฅ", data=mp3, file_name=a, mime="audio/mpeg", key=f"download_{a}")
# Offer download for common text files like txt, csv etc.
elif ext in ['md', 'txt', 'csv', 'json', 'xml', 'log']:
with open(a, 'r', encoding='utf-8') as text_file:
cols[2].download_button("โฌ‡๏ธ", data=text_file.read(), file_name=a, mime="text/plain", key=f"download_{a}")
# Offer download for common image files
elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff']:
with open(a, 'rb') as img_file:
cols[2].download_button("โฌ‡๏ธ", data=img_file.read(), file_name=a, mime=f"image/{ext}", key=f"download_{a}")
# Offer download for common video files (Streamlit doesn't have easy preview here)
elif ext in ['mp4', 'webm', 'ogg', 'avi', 'mov']:
with open(a, 'rb') as video_file:
cols[2].download_button("โฌ‡๏ธ", data=video_file.read(), file_name=a, mime=f"video/{ext}", key=f"download_{a}")
# Handle other file types - maybe just offer download with guessed mime
else:
with open(a, 'rb') as other_file:
cols[2].download_button("โฌ‡๏ธ", data=other_file.read(), file_name=a, key=f"download_{a}") # Mime type is guessed by streamlit
# Delete button in the fourth column
cols[3].button("๐Ÿ—‘๏ธ", key=f"del_{a}", on_click=delete_asset, args=(a,))
except Exception as e:
# Display error next to the file if handling fails
cols[3].error(f"Error: {e}")
# --- Combined PDF Generation Button ---
# Only show button if there are any assets listed
if assets:
if st.button("Generate Combined PDF from Selected Assets"):
# Get the list of selected asset paths
selected_asset_paths = [path for path, selected in st.session_state.selected_assets.items() if selected]
if not selected_asset_paths:
st.warning("Please select at least one asset.")
else:
with st.spinner("Generating combined PDF..."):
try:
# Call the new function to generate the combined PDF
combined_pdf_bytes = generate_combined_pdf(selected_asset_paths)
if combined_pdf_bytes: # Check if the function returned bytes (meaning content was added)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
pdf_name = f"Combined_Assets_{timestamp}.pdf"
# Provide the generated PDF for download
st.download_button(
"โฌ‡๏ธ Download Combined PDF",
data=combined_pdf_bytes,
file_name=pdf_name,
mime="application/pdf"
)
st.success("Combined PDF generated!")
else:
# This case might happen if selected files couldn't be read/processed
st.warning("Generated PDF is empty. Check selected files or console for errors.")
except Exception as e:
st.error(f"An unexpected error occurred during PDF generation: {e}")
# --- Image Gallery ---
st.markdown("---")
st.subheader("๐Ÿ–ผ๏ธ Image Gallery")
# Find common image file types
image_files = sorted(glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg") + glob.glob("*.gif") + glob.glob("*.bmp") + glob.glob("*.tiff"))
if not image_files:
st.info("No image files found in the directory.")
else:
# Slider to control the number of columns in the gallery
image_cols = st.slider("Image Gallery Columns", min_value=1, max_value=10, value=5)
# Ensure image_cols is at least 1
image_cols = max(1, image_cols)
# Display images in columns
cols = st.columns(image_cols)
for idx, image_file in enumerate(image_files):
with cols[idx % image_cols]: # Cycle through columns
try:
img = Image.open(image_file)
st.image(img, caption=os.path.basename(image_file), use_container_width=True)
except Exception as e:
st.warning(f"Could not display image {image_file}: {e}")
# --- Video Gallery ---
st.markdown("---")
st.subheader("๐ŸŽฅ Video Gallery")
# Find common video file types
video_files = sorted(glob.glob("*.mp4") + glob.glob("*.webm") + glob.glob("*.ogg") + glob.glob("*.avi") + glob.glob("*.mov"))
if not video_files:
st.info("No video files found in the directory.")
else:
# Slider to control the number of columns in the gallery
video_cols = st.slider("Video Gallery Columns", min_value=1, max_value=5, value=3)
# Ensure video_cols is at least 1
video_cols = max(1, video_cols)
# Display videos in columns
cols = st.columns(video_cols)
for idx, video_file in enumerate(video_files):
with cols[idx % video_cols]: # Cycle through columns
try:
# Streamlit's built-in video player is simpler than custom HTML
st.video(video_file, caption=os.path.basename(video_file))
except Exception as e:
st.warning(f"Could not display video {video_file}: {e}")
with tab2:
st.header("๐Ÿงช Python Code Executor & Demo")
import io, sys
from contextlib import redirect_stdout
DEFAULT_CODE = '''import streamlit as st
import random
st.title("๐Ÿ“Š Demo App")
st.markdown("Random number and color demo")
col1, col2 = st.columns(2)
with col1:
num = st.number_input("Number:", 1, 100, 10)
mul = st.slider("Multiplier:", 1, 10, 2)
if st.button("Calc"):
st.write(num * mul)
with col2:
color = st.color_picker("Pick color","#ff0000")
st.markdown(f'<div style="background:{color};padding:10px;">Color</div>', unsafe_allow_html=True)
''' # noqa
def extract_python_code(md: str) -> list:
# Find all blocks starting with ```python and ending with ```
return re.findall(r"```python\s*(.*?)```", md, re.DOTALL)
def execute_code(code: str) -> tuple:
buf = io.StringIO(); local_vars = {}
# Redirect stdout to capture print statements
try:
with redirect_stdout(buf):
# Use exec to run the code. locals() and globals() are needed.
# Passing empty dicts might limit some functionalities but provides isolation.
exec(code, {}, local_vars)
return buf.getvalue(), None # Return captured output
except Exception as e:
return None, str(e) # Return error message
up = st.file_uploader("Upload .py or .md", type=['py', 'md'])
# Initialize session state for code if it doesn't exist
if 'code' not in st.session_state:
st.session_state.code = DEFAULT_CODE
if up:
text = up.getvalue().decode()
if up.type == 'text/markdown':
codes = extract_python_code(text)
if codes:
# Take the first python code block found
st.session_state.code = codes[0].strip()
else:
st.warning("No Python code block found in the markdown file.")
st.session_state.code = '' # Clear code if no block found
else: # .py file
st.session_state.code = text.strip()
# Display the code after upload
st.code(st.session_state.code, language='python')
else:
# Text area for code editing if no file is uploaded or after processing upload
st.session_state.code = st.text_area("๐Ÿ’ป Code Editor", value=st.session_state.code, height=400) # Increased height
c1, c2 = st.columns([1, 1])
if c1.button("โ–ถ๏ธ Run Code"):
if st.session_state.code.strip():
out, err = execute_code(st.session_state.code)
if err:
st.error(f"Execution Error:\n{err}")
elif out:
st.subheader("Output:")
st.code(out)
else:
st.success("Executed with no standard output.")
else:
st.warning("No code to run.")
if c2.button("๐Ÿ—‘๏ธ Clear Code"):
st.session_state.code = ''
st.rerun()