Spaces:
Sleeping
Sleeping
import streamlit as st | |
import base64 | |
from reportlab.lib.pagesizes import A4 | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib import colors | |
import io | |
import re | |
# Define the ML outline as a markdown string | |
ml_markdown = """# Cutting-Edge ML Outline | |
## Core ML Techniques | |
1. π **Mixture of Experts (MoE)** | |
- Conditional computation techniques | |
- Sparse gating mechanisms | |
- Training specialized sub-models | |
2. π₯ **Supervised Fine-Tuning (SFT) using PyTorch** | |
- Loss function customization | |
- Gradient accumulation strategies | |
- Learning rate schedulers | |
3. π€ **Large Language Models (LLM) using Transformers** | |
- Attention mechanisms | |
- Tokenization strategies | |
- Position encodings | |
## Training Methods | |
4. π **Self-Rewarding Learning using NPS 0-10 and Verbatims** | |
- Custom reward functions | |
- Feedback categorization | |
- Signal extraction from text | |
5. π **Reinforcement Learning from Human Feedback (RLHF)** | |
- Preference datasets | |
- PPO implementation | |
- KL divergence constraints | |
6. π **MergeKit: Merging Models to Same Embedding Space** | |
- TIES merging | |
- Task arithmetic | |
- SLERP interpolation | |
## Optimization & Deployment | |
7. π **DistillKit: Model Size Reduction with Spectrum Analysis** | |
- Knowledge distillation | |
- Quantization techniques | |
- Model pruning strategies | |
8. π§ **Agentic RAG Agents using Document Inputs** | |
- Vector database integration | |
- Query planning | |
- Self-reflection mechanisms | |
9. β³ **Longitudinal Data Summarization from Multiple Docs** | |
- Multi-document compression | |
- Timeline extraction | |
- Entity tracking | |
## Knowledge Representation | |
10. π **Knowledge Extraction using Markdown Knowledge Graphs** | |
- Entity recognition | |
- Relationship mapping | |
- Hierarchical structuring | |
11. πΊοΈ **Knowledge Mapping with Mermaid Diagrams** | |
- Flowchart generation | |
- Sequence diagram creation | |
- State diagrams | |
12. π» **ML Code Generation with Streamlit/Gradio/HTML5+JS** | |
- Code completion | |
- Unit test generation | |
- Documentation synthesis | |
""" | |
# Process multilevel markdown for PDF output | |
def markdown_to_pdf_content(markdown_text): | |
"""Convert markdown text to a format suitable for PDF generation""" | |
lines = markdown_text.strip().split('\n') | |
pdf_content = [] | |
in_list_item = False | |
current_item = None | |
sub_items = [] | |
for line in lines: | |
line = line.strip() | |
if not line: | |
continue | |
if line.startswith('# '): | |
pass | |
elif line.startswith('## '): | |
if current_item and sub_items: | |
pdf_content.append([current_item, sub_items]) | |
sub_items = [] | |
current_item = None | |
section = line.replace('## ', '').strip() | |
pdf_content.append(f"<b>{section}</b>") | |
in_list_item = False | |
elif re.match(r'^\d+\.', line): | |
if current_item and sub_items: | |
pdf_content.append([current_item, sub_items]) | |
sub_items = [] | |
current_item = line.strip() | |
in_list_item = True | |
elif line.startswith('- ') and in_list_item: | |
sub_items.append(line.strip()) | |
else: | |
if not in_list_item: | |
pdf_content.append(line.strip()) | |
if current_item and sub_items: | |
pdf_content.append([current_item, sub_items]) | |
mid_point = len(pdf_content) // 2 | |
left_column = pdf_content[:mid_point] | |
right_column = pdf_content[mid_point:] | |
return left_column, right_column | |
# Main PDF creation using ReportLab | |
def create_main_pdf(markdown_text): | |
"""Create a single-page landscape PDF with the outline in two columns""" | |
buffer = io.BytesIO() | |
doc = SimpleDocTemplate( | |
buffer, | |
pagesize=(A4[1], A4[0]), # Landscape A4: 841.89 x 595.27 points | |
leftMargin=36, | |
rightMargin=36, | |
topMargin=36, | |
bottomMargin=36 | |
) | |
styles = getSampleStyleSheet() | |
story = [] | |
page_height = A4[0] - 72 | |
title_height = 20 | |
spacer_height = 10 | |
available_content_height = page_height - title_height - spacer_height | |
# Process columns first | |
left_column, right_column = markdown_to_pdf_content(markdown_text) | |
# Calculate total items by explicitly handling the unpacking | |
total_items = 0 | |
for col in (left_column, right_column): | |
for item in col: | |
if isinstance(item, list): | |
main_item, sub_items = item | |
total_items += 1 + len(sub_items) | |
else: | |
total_items += 1 | |
# Dynamic font sizes | |
base_font_size = max(6, min(11, 200 / total_items)) | |
item_font_size = base_font_size | |
subitem_font_size = base_font_size * 0.9 | |
section_font_size = base_font_size * 1.2 | |
# Create custom styles | |
title_style = styles['Heading1'] | |
title_style.textColor = colors.darkblue | |
title_style.alignment = 1 | |
title_style.fontSize = min(16, base_font_size * 1.5) | |
section_style = ParagraphStyle( | |
'SectionStyle', | |
parent=styles['Heading2'], | |
textColor=colors.darkblue, | |
fontSize=section_font_size, | |
leading=section_font_size * 1.2, | |
spaceAfter=2 | |
) | |
item_style = ParagraphStyle( | |
'ItemStyle', | |
parent=styles['Normal'], | |
fontSize=item_font_size, | |
leading=item_font_size * 1.2, | |
fontName='Helvetica-Bold', | |
spaceAfter=1 | |
) | |
subitem_style = ParagraphStyle( | |
'SubItemStyle', | |
parent=styles['Normal'], | |
fontSize=subitem_font_size, | |
leading=subitem_font_size * 1.2, | |
leftIndent=10, | |
spaceAfter=1 | |
) | |
# Add title | |
story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) | |
story.append(Spacer(1, spacer_height)) | |
# Prepare data for table | |
left_cells = [] | |
for item in left_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
text = item.replace('<b>', '').replace('</b>', '') | |
left_cells.append(Paragraph(text, section_style)) | |
elif isinstance(item, list): | |
main_item, sub_items = item | |
left_cells.append(Paragraph(main_item, item_style)) | |
for sub_item in sub_items: | |
left_cells.append(Paragraph(sub_item, subitem_style)) | |
else: | |
left_cells.append(Paragraph(item, item_style)) | |
right_cells = [] | |
for item in right_column: | |
if isinstance(item, str) and item.startswith('<b>'): | |
text = item.replace('<b>', '').replace('</b>', '') | |
right_cells.append(Paragraph(text, section_style)) | |
elif isinstance(item, list): | |
main_item, sub_items = item | |
right_cells.append(Paragraph(main_item, item_style)) | |
for sub_item in sub_items: | |
right_cells.append(Paragraph(sub_item, subitem_style)) | |
else: | |
right_cells.append(Paragraph(item, item_style)) | |
# Make columns equal length | |
max_cells = max(len(left_cells), len(right_cells)) | |
left_cells.extend([""] * (max_cells - len(left_cells))) | |
right_cells.extend([""] * (max_cells - len(right_cells))) | |
# Create table data | |
table_data = list(zip(left_cells, right_cells)) | |
# Calculate column widths | |
col_width = (A4[1] - 72) / 2.0 | |
# Create and style table | |
table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER') | |
table.setStyle(TableStyle([ | |
('VALIGN', (0, 0), (-1, -1), 'TOP'), | |
('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
('BACKGROUND', (0, 0), (-1, -1), colors.white), | |
('GRID', (0, 0), (-1, -1), 0, colors.white), | |
('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), | |
('LEFTPADDING', (0, 0), (-1, -1), 2), | |
('RIGHTPADDING', (0, 0), (-1, -1), 2), | |
('TOPPADDING', (0, 0), (-1, -1), 1), | |
('BOTTOMPADDING', (0, 0), (-1, -1), 1), | |
])) | |
story.append(table) | |
doc.build(story) | |
buffer.seek(0) | |
return buffer.getvalue() | |
# Streamlit UI | |
st.title("π Cutting-Edge ML Outline Generator") | |
if st.button("Generate Main PDF"): | |
with st.spinner("Generating PDF..."): | |
pdf_bytes = create_main_pdf(ml_markdown) | |
st.download_button( | |
label="Download Main PDF", | |
data=pdf_bytes, | |
file_name="ml_outline.pdf", | |
mime="application/pdf" | |
) | |
base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8') | |
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">' | |
st.markdown(pdf_display, unsafe_allow_html=True) | |
st.success("PDF generated successfully!") |