Spaces:

awacke1
/

Pillow-PyMuPDF-ReportLab

Sleeping

App Files Files Community

awacke1 commited on Apr 1

Commit

3febaf2

verified ·

1 Parent(s): db0a51e

Create app.py

Browse files

Files changed (1) hide show

app.py +245 -0

app.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import streamlit as st
+import base64
+from reportlab.lib.pagesizes import A4
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib import colors
+import io
+import re
+# Define the ML outline as a markdown string
+ml_markdown = """# Cutting-Edge ML Outline
+## Core ML Techniques
+1. 🌟 **Mixture of Experts (MoE)**
+   - Conditional computation techniques
+   - Sparse gating mechanisms
+   - Training specialized sub-models
+2. 🔥 **Supervised Fine-Tuning (SFT) using PyTorch**
+   - Loss function customization
+   - Gradient accumulation strategies
+   - Learning rate schedulers
+3. 🤖 **Large Language Models (LLM) using Transformers**
+   - Attention mechanisms
+   - Tokenization strategies
+   - Position encodings
+## Training Methods
+4. 📊 **Self-Rewarding Learning using NPS 0-10 and Verbatims**
+   - Custom reward functions
+   - Feedback categorization
+   - Signal extraction from text
+5. 👍 **Reinforcement Learning from Human Feedback (RLHF)**
+   - Preference datasets
+   - PPO implementation
+   - KL divergence constraints
+6. 🔗 **MergeKit: Merging Models to Same Embedding Space**
+   - TIES merging
+   - Task arithmetic
+   - SLERP interpolation
+## Optimization & Deployment
+7. 📏 **DistillKit: Model Size Reduction with Spectrum Analysis**
+   - Knowledge distillation
+   - Quantization techniques
+   - Model pruning strategies
+8. 🧠 **Agentic RAG Agents using Document Inputs**
+   - Vector database integration
+   - Query planning
+   - Self-reflection mechanisms
+9. ⏳ **Longitudinal Data Summarization from Multiple Docs**
+   - Multi-document compression
+   - Timeline extraction
+   - Entity tracking
+## Knowledge Representation
+10. 📑 **Knowledge Extraction using Markdown Knowledge Graphs**
+    - Entity recognition
+    - Relationship mapping
+    - Hierarchical structuring
+11. 🗺️ **Knowledge Mapping with Mermaid Diagrams**
+    - Flowchart generation
+    - Sequence diagram creation
+    - State diagrams
+12. 💻 **ML Code Generation with Streamlit/Gradio/HTML5+JS**
+    - Code completion
+    - Unit test generation
+    - Documentation synthesis
+"""
+# Process multilevel markdown for PDF output
+def markdown_to_pdf_content(markdown_text):
+    """Convert markdown text to a format suitable for PDF generation"""
+    lines = markdown_text.strip().split('\n')
+    pdf_content = []
+    in_list_item = False
+    current_item = None
+    sub_items = []
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        if line.startswith('# '):
+            pass
+        elif line.startswith('## '):
+            if current_item and sub_items:
+                pdf_content.append([current_item, sub_items])
+                sub_items = []
+                current_item = None
+            section = line.replace('## ', '').strip()
+            pdf_content.append(f"<b>{section}</b>")
+            in_list_item = False
+        elif re.match(r'^\d+\.', line):
+            if current_item and sub_items:
+                pdf_content.append([current_item, sub_items])
+                sub_items = []
+            current_item = line.strip()
+            in_list_item = True
+        elif line.startswith('- ') and in_list_item:
+            sub_items.append(line.strip())
+        else:
+            if not in_list_item:
+                pdf_content.append(line.strip())
+    if current_item and sub_items:
+        pdf_content.append([current_item, sub_items])
+    mid_point = len(pdf_content) // 2
+    left_column = pdf_content[:mid_point]
+    right_column = pdf_content[mid_point:]
+    return left_column, right_column
+# Main PDF creation using ReportLab
+def create_main_pdf(markdown_text):
+    """Create a single-page landscape PDF with the outline in two columns"""
+    buffer = io.BytesIO()
+    doc = SimpleDocTemplate(
+        buffer,
+        pagesize=(A4[1], A4[0]),  # Landscape
+        leftMargin=50,
+        rightMargin=50,
+        topMargin=50,
+        bottomMargin=50
+    )
+    styles = getSampleStyleSheet()
+    story = []
+    # Create custom styles
+    title_style = styles['Heading1']
+    title_style.textColor = colors.darkblue
+    title_style.alignment = 1  # Center alignment
+    section_style = ParagraphStyle(
+        'SectionStyle',
+        parent=styles['Heading2'],
+        textColor=colors.darkblue,
+        spaceAfter=6
+    )
+    item_style = ParagraphStyle(
+        'ItemStyle',
+        parent=styles['Normal'],
+        fontSize=11,
+        leading=14,
+        fontName='Helvetica-Bold'
+    )
+    subitem_style = ParagraphStyle(
+        'SubItemStyle',
+        parent=styles['Normal'],
+        fontSize=10,
+        leading=12,
+        leftIndent=20
+    )
+    # Add title
+    story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style))
+    story.append(Spacer(1, 20))
+    # Process markdown content
+    left_column, right_column = markdown_to_pdf_content(markdown_text)
+    # Prepare data for table
+    left_cells = []
+    for item in left_column:
+        if isinstance(item, str) and item.startswith('<b>'):
+            text = item.replace('<b>', '').replace('</b>', '')
+            left_cells.append(Paragraph(text, section_style))
+        elif isinstance(item, list):
+            main_item, sub_items = item
+            left_cells.append(Paragraph(main_item, item_style))
+            for sub_item in sub_items:
+                left_cells.append(Paragraph(sub_item, subitem_style))
+        else:
+            left_cells.append(Paragraph(item, item_style))
+    right_cells = []
+    for item in right_column:
+        if isinstance(item, str) and item.startswith('<b>'):
+            text = item.replace('<b>', '').replace('</b>', '')
+            right_cells.append(Paragraph(text, section_style))
+        elif isinstance(item, list):
+            main_item, sub_items = item
+            right_cells.append(Paragraph(main_item, item_style))
+            for sub_item in sub_items:
+                right_cells.append(Paragraph(sub_item, subitem_style))
+        else:
+            right_cells.append(Paragraph(item, item_style))
+    # Make columns equal length
+    max_cells = max(len(left_cells), len(right_cells))
+    left_cells.extend([""] * (max_cells - len(left_cells)))
+    right_cells.extend([""] * (max_cells - len(right_cells)))
+    # Create table data
+    table_data = list(zip(left_cells, right_cells))
+    # Calculate column widths
+    col_width = (A4[1] - 120) / 2.0
+    # Create and style table
+    table = Table(table_data, colWidths=[col_width, col_width])
+    table.setStyle(TableStyle([
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('ALIGN', (0, 0), (0, -1), 'LEFT'),
+        ('ALIGN', (1, 0), (1, -1), 'LEFT'),
+        ('BACKGROUND', (0, 0), (-1, -1), colors.white),
+        ('GRID', (0, 0), (-1, -1), 0.5, colors.white),
+        ('LINEAFTER', (0, 0), (0, -1), 1, colors.grey),
+    ]))
+    story.append(table)
+    doc.build(story)
+    buffer.seek(0)
+    return buffer.getvalue()
+# Streamlit UI
+st.title("🚀 Cutting-Edge ML Outline Generator")
+if st.button("Generate Main PDF"):
+    with st.spinner("Generating PDF..."):
+        pdf_bytes = create_main_pdf(ml_markdown)
+        st.download_button(
+            label="Download Main PDF",
+            data=pdf_bytes,
+            file_name="ml_outline.pdf",
+            mime="application/pdf"
+        )
+        base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
+        pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">'
+        st.markdown(pdf_display, unsafe_allow_html=True)
+        st.success("PDF generated successfully!")