Spaces:

awacke1
/

Pillow-PyMuPDF-ReportLab

Sleeping

App Files Files Community

awacke1 commited on Apr 1

Commit

fba4d1f

verified ·

1 Parent(s): 15f6774

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -3

app.py CHANGED Viewed

@@ -1,4 +1,130 @@
 def create_main_pdf(markdown_text):
     buffer = io.BytesIO()
     doc = SimpleDocTemplate(
         buffer,
@@ -25,7 +151,7 @@ def create_main_pdf(markdown_text):
     for col in (left_column, right_column):
         for item in col:
             if isinstance(item, list):
-                main_item, sub_items = item  # Unpack here
                 total_items += 1 + len(sub_items)
             else:
                 total_items += 1
@@ -39,7 +165,7 @@ def create_main_pdf(markdown_text):
     # Create custom styles
     title_style = styles['Heading1']
     title_style.textColor = colors.darkblue
-    title_style.alignment = 1
     title_style.fontSize = min(16, base_font_size * 1.5)
     section_style = ParagraphStyle(
@@ -128,4 +254,21 @@ def create_main_pdf(markdown_text):
     story.append(table)
     doc.build(story)
     buffer.seek(0)
-    return buffer.getvalue()

+import streamlit as st
+import base64
+from reportlab.lib.pagesizes import A4
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib import colors
+import io
+import re
+# Define the ML outline as a markdown string
+ml_markdown = """# Cutting-Edge ML Outline
+## Core ML Techniques
+1. 🌟 **Mixture of Experts (MoE)**
+   - Conditional computation techniques
+   - Sparse gating mechanisms
+   - Training specialized sub-models
+2. 🔥 **Supervised Fine-Tuning (SFT) using PyTorch**
+   - Loss function customization
+   - Gradient accumulation strategies
+   - Learning rate schedulers
+3. 🤖 **Large Language Models (LLM) using Transformers**
+   - Attention mechanisms
+   - Tokenization strategies
+   - Position encodings
+## Training Methods
+4. 📊 **Self-Rewarding Learning using NPS 0-10 and Verbatims**
+   - Custom reward functions
+   - Feedback categorization
+   - Signal extraction from text
+5. 👍 **Reinforcement Learning from Human Feedback (RLHF)**
+   - Preference datasets
+   - PPO implementation
+   - KL divergence constraints
+6. 🔗 **MergeKit: Merging Models to Same Embedding Space**
+   - TIES merging
+   - Task arithmetic
+   - SLERP interpolation
+## Optimization & Deployment
+7. 📏 **DistillKit: Model Size Reduction with Spectrum Analysis**
+   - Knowledge distillation
+   - Quantization techniques
+   - Model pruning strategies
+8. 🧠 **Agentic RAG Agents using Document Inputs**
+   - Vector database integration
+   - Query planning
+   - Self-reflection mechanisms
+9. ⏳ **Longitudinal Data Summarization from Multiple Docs**
+   - Multi-document compression
+   - Timeline extraction
+   - Entity tracking
+## Knowledge Representation
+10. 📑 **Knowledge Extraction using Markdown Knowledge Graphs**
+    - Entity recognition
+    - Relationship mapping
+    - Hierarchical structuring
+11. 🗺️ **Knowledge Mapping with Mermaid Diagrams**
+    - Flowchart generation
+    - Sequence diagram creation
+    - State diagrams
+12. 💻 **ML Code Generation with Streamlit/Gradio/HTML5+JS**
+    - Code completion
+    - Unit test generation
+    - Documentation synthesis
+"""
+# Process multilevel markdown for PDF output
+def markdown_to_pdf_content(markdown_text):
+    """Convert markdown text to a format suitable for PDF generation"""
+    lines = markdown_text.strip().split('\n')
+    pdf_content = []
+    in_list_item = False
+    current_item = None
+    sub_items = []
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        if line.startswith('# '):
+            pass
+        elif line.startswith('## '):
+            if current_item and sub_items:
+                pdf_content.append([current_item, sub_items])
+                sub_items = []
+                current_item = None
+            section = line.replace('## ', '').strip()
+            pdf_content.append(f"<b>{section}</b>")
+            in_list_item = False
+        elif re.match(r'^\d+\.', line):
+            if current_item and sub_items:
+                pdf_content.append([current_item, sub_items])
+                sub_items = []
+            current_item = line.strip()
+            in_list_item = True
+        elif line.startswith('- ') and in_list_item:
+            sub_items.append(line.strip())
+        else:
+            if not in_list_item:
+                pdf_content.append(line.strip())
+    if current_item and sub_items:
+        pdf_content.append([current_item, sub_items])
+    mid_point = len(pdf_content) // 2
+    left_column = pdf_content[:mid_point]
+    right_column = pdf_content[mid_point:]
+    return left_column, right_column
+# Main PDF creation using ReportLab
 def create_main_pdf(markdown_text):
+    """Create a single-page landscape PDF with the outline in two columns"""
     buffer = io.BytesIO()
     doc = SimpleDocTemplate(
         buffer,
     for col in (left_column, right_column):
         for item in col:
             if isinstance(item, list):
+                main_item, sub_items = item
                 total_items += 1 + len(sub_items)
             else:
                 total_items += 1
     # Create custom styles
     title_style = styles['Heading1']
     title_style.textColor = colors.darkblue
+    title_style.alignment = 1
     title_style.fontSize = min(16, base_font_size * 1.5)
     section_style = ParagraphStyle(
     story.append(table)
     doc.build(story)
     buffer.seek(0)
+    return buffer.getvalue()
+# Streamlit UI
+st.title("🚀 Cutting-Edge ML Outline Generator")
+if st.button("Generate Main PDF"):
+    with st.spinner("Generating PDF..."):
+        pdf_bytes = create_main_pdf(ml_markdown)
+        st.download_button(
+            label="Download Main PDF",
+            data=pdf_bytes,
+            file_name="ml_outline.pdf",
+            mime="application/pdf"
+        )
+        base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
+        pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">'
+        st.markdown(pdf_display, unsafe_allow_html=True)
+        st.success("PDF generated successfully!")