Spaces:

awacke1
/

Pillow-PyMuPDF-ReportLab

Sleeping

App Files Files Community

Pillow-PyMuPDF-ReportLab / app.py

awacke1

Update app.py

fba4d1f verified 7 months ago

raw

history blame

8.94 kB

	import streamlit as st
	import base64
	from reportlab.lib.pagesizes import A4
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib import colors
	import io
	import re

	# Define the ML outline as a markdown string
	ml_markdown = """# Cutting-Edge ML Outline

	## Core ML Techniques
	1. 🌟 Mixture of Experts (MoE)
	- Conditional computation techniques
	- Sparse gating mechanisms
	- Training specialized sub-models

	2. 🔥 Supervised Fine-Tuning (SFT) using PyTorch
	- Loss function customization
	- Gradient accumulation strategies
	- Learning rate schedulers

	3. 🤖 Large Language Models (LLM) using Transformers
	- Attention mechanisms
	- Tokenization strategies
	- Position encodings

	## Training Methods
	4. 📊 Self-Rewarding Learning using NPS 0-10 and Verbatims
	- Custom reward functions
	- Feedback categorization
	- Signal extraction from text

	5. 👍 Reinforcement Learning from Human Feedback (RLHF)
	- Preference datasets
	- PPO implementation
	- KL divergence constraints

	6. 🔗 MergeKit: Merging Models to Same Embedding Space
	- TIES merging
	- Task arithmetic
	- SLERP interpolation

	## Optimization & Deployment
	7. 📏 DistillKit: Model Size Reduction with Spectrum Analysis
	- Knowledge distillation
	- Quantization techniques
	- Model pruning strategies

	8. 🧠 Agentic RAG Agents using Document Inputs
	- Vector database integration
	- Query planning
	- Self-reflection mechanisms

	9. ⏳ Longitudinal Data Summarization from Multiple Docs
	- Multi-document compression
	- Timeline extraction
	- Entity tracking

	## Knowledge Representation
	10. 📑 Knowledge Extraction using Markdown Knowledge Graphs
	- Entity recognition
	- Relationship mapping
	- Hierarchical structuring

	11. 🗺️ Knowledge Mapping with Mermaid Diagrams
	- Flowchart generation
	- Sequence diagram creation
	- State diagrams

	12. 💻 ML Code Generation with Streamlit/Gradio/HTML5+JS
	- Code completion
	- Unit test generation
	- Documentation synthesis
	"""

	# Process multilevel markdown for PDF output
	def markdown_to_pdf_content(markdown_text):
	"""Convert markdown text to a format suitable for PDF generation"""
	lines = markdown_text.strip().split('\n')
	pdf_content = []
	in_list_item = False
	current_item = None
	sub_items = []

	for line in lines:
	line = line.strip()
	if not line:
	continue

	if line.startswith('# '):
	pass
	elif line.startswith('## '):
	if current_item and sub_items:
	pdf_content.append([current_item, sub_items])
	sub_items = []
	current_item = None

	section = line.replace('## ', '').strip()
	pdf_content.append(f"<b>{section}</b>")
	in_list_item = False
	elif re.match(r'^\d+\.', line):
	if current_item and sub_items:
	pdf_content.append([current_item, sub_items])
	sub_items = []

	current_item = line.strip()
	in_list_item = True
	elif line.startswith('- ') and in_list_item:
	sub_items.append(line.strip())
	else:
	if not in_list_item:
	pdf_content.append(line.strip())

	if current_item and sub_items:
	pdf_content.append([current_item, sub_items])

	mid_point = len(pdf_content) // 2
	left_column = pdf_content[:mid_point]
	right_column = pdf_content[mid_point:]

	return left_column, right_column

	# Main PDF creation using ReportLab
	def create_main_pdf(markdown_text):
	"""Create a single-page landscape PDF with the outline in two columns"""
	buffer = io.BytesIO()
	doc = SimpleDocTemplate(
	buffer,
	pagesize=(A4[1], A4[0]), # Landscape A4: 841.89 x 595.27 points
	leftMargin=36,
	rightMargin=36,
	topMargin=36,
	bottomMargin=36
	)

	styles = getSampleStyleSheet()
	story = []

	page_height = A4[0] - 72
	title_height = 20
	spacer_height = 10
	available_content_height = page_height - title_height - spacer_height

	# Process columns first
	left_column, right_column = markdown_to_pdf_content(markdown_text)

	# Calculate total items by explicitly handling the unpacking
	total_items = 0
	for col in (left_column, right_column):
	for item in col:
	if isinstance(item, list):
	main_item, sub_items = item
	total_items += 1 + len(sub_items)
	else:
	total_items += 1

	# Dynamic font sizes
	base_font_size = max(6, min(11, 200 / total_items))
	item_font_size = base_font_size
	subitem_font_size = base_font_size * 0.9
	section_font_size = base_font_size * 1.2

	# Create custom styles
	title_style = styles['Heading1']
	title_style.textColor = colors.darkblue
	title_style.alignment = 1
	title_style.fontSize = min(16, base_font_size * 1.5)

	section_style = ParagraphStyle(
	'SectionStyle',
	parent=styles['Heading2'],
	textColor=colors.darkblue,
	fontSize=section_font_size,
	leading=section_font_size * 1.2,
	spaceAfter=2
	)

	item_style = ParagraphStyle(
	'ItemStyle',
	parent=styles['Normal'],
	fontSize=item_font_size,
	leading=item_font_size * 1.2,
	fontName='Helvetica-Bold',
	spaceAfter=1
	)

	subitem_style = ParagraphStyle(
	'SubItemStyle',
	parent=styles['Normal'],
	fontSize=subitem_font_size,
	leading=subitem_font_size * 1.2,
	leftIndent=10,
	spaceAfter=1
	)

	# Add title
	story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style))
	story.append(Spacer(1, spacer_height))

	# Prepare data for table
	left_cells = []
	for item in left_column:
	if isinstance(item, str) and item.startswith('<b>'):
	text = item.replace('<b>', '').replace('</b>', '')
	left_cells.append(Paragraph(text, section_style))
	elif isinstance(item, list):
	main_item, sub_items = item
	left_cells.append(Paragraph(main_item, item_style))
	for sub_item in sub_items:
	left_cells.append(Paragraph(sub_item, subitem_style))
	else:
	left_cells.append(Paragraph(item, item_style))

	right_cells = []
	for item in right_column:
	if isinstance(item, str) and item.startswith('<b>'):
	text = item.replace('<b>', '').replace('</b>', '')
	right_cells.append(Paragraph(text, section_style))
	elif isinstance(item, list):
	main_item, sub_items = item
	right_cells.append(Paragraph(main_item, item_style))
	for sub_item in sub_items:
	right_cells.append(Paragraph(sub_item, subitem_style))
	else:
	right_cells.append(Paragraph(item, item_style))

	# Make columns equal length
	max_cells = max(len(left_cells), len(right_cells))
	left_cells.extend([""] * (max_cells - len(left_cells)))
	right_cells.extend([""] * (max_cells - len(right_cells)))

	# Create table data
	table_data = list(zip(left_cells, right_cells))

	# Calculate column widths
	col_width = (A4[1] - 72) / 2.0

	# Create and style table
	table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER')
	table.setStyle(TableStyle([
	('VALIGN', (0, 0), (-1, -1), 'TOP'),
	('ALIGN', (0, 0), (-1, -1), 'LEFT'),
	('BACKGROUND', (0, 0), (-1, -1), colors.white),
	('GRID', (0, 0), (-1, -1), 0, colors.white),
	('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey),
	('LEFTPADDING', (0, 0), (-1, -1), 2),
	('RIGHTPADDING', (0, 0), (-1, -1), 2),
	('TOPPADDING', (0, 0), (-1, -1), 1),
	('BOTTOMPADDING', (0, 0), (-1, -1), 1),
	]))

	story.append(table)
	doc.build(story)
	buffer.seek(0)
	return buffer.getvalue()

	# Streamlit UI
	st.title("🚀 Cutting-Edge ML Outline Generator")

	if st.button("Generate Main PDF"):
	with st.spinner("Generating PDF..."):
	pdf_bytes = create_main_pdf(ml_markdown)
	st.download_button(
	label="Download Main PDF",
	data=pdf_bytes,
	file_name="ml_outline.pdf",
	mime="application/pdf"
	)
	base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
	pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">'
	st.markdown(pdf_display, unsafe_allow_html=True)
	st.success("PDF generated successfully!")