awacke1 commited on
Commit
3febaf2
Β·
verified Β·
1 Parent(s): db0a51e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -0
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import base64
3
+ from reportlab.lib.pagesizes import A4
4
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
5
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
6
+ from reportlab.lib import colors
7
+ import io
8
+ import re
9
+
10
+ # Define the ML outline as a markdown string
11
+ ml_markdown = """# Cutting-Edge ML Outline
12
+
13
+ ## Core ML Techniques
14
+ 1. 🌟 **Mixture of Experts (MoE)**
15
+ - Conditional computation techniques
16
+ - Sparse gating mechanisms
17
+ - Training specialized sub-models
18
+
19
+ 2. πŸ”₯ **Supervised Fine-Tuning (SFT) using PyTorch**
20
+ - Loss function customization
21
+ - Gradient accumulation strategies
22
+ - Learning rate schedulers
23
+
24
+ 3. πŸ€– **Large Language Models (LLM) using Transformers**
25
+ - Attention mechanisms
26
+ - Tokenization strategies
27
+ - Position encodings
28
+
29
+ ## Training Methods
30
+ 4. πŸ“Š **Self-Rewarding Learning using NPS 0-10 and Verbatims**
31
+ - Custom reward functions
32
+ - Feedback categorization
33
+ - Signal extraction from text
34
+
35
+ 5. πŸ‘ **Reinforcement Learning from Human Feedback (RLHF)**
36
+ - Preference datasets
37
+ - PPO implementation
38
+ - KL divergence constraints
39
+
40
+ 6. πŸ”— **MergeKit: Merging Models to Same Embedding Space**
41
+ - TIES merging
42
+ - Task arithmetic
43
+ - SLERP interpolation
44
+
45
+ ## Optimization & Deployment
46
+ 7. πŸ“ **DistillKit: Model Size Reduction with Spectrum Analysis**
47
+ - Knowledge distillation
48
+ - Quantization techniques
49
+ - Model pruning strategies
50
+
51
+ 8. 🧠 **Agentic RAG Agents using Document Inputs**
52
+ - Vector database integration
53
+ - Query planning
54
+ - Self-reflection mechanisms
55
+
56
+ 9. ⏳ **Longitudinal Data Summarization from Multiple Docs**
57
+ - Multi-document compression
58
+ - Timeline extraction
59
+ - Entity tracking
60
+
61
+ ## Knowledge Representation
62
+ 10. πŸ“‘ **Knowledge Extraction using Markdown Knowledge Graphs**
63
+ - Entity recognition
64
+ - Relationship mapping
65
+ - Hierarchical structuring
66
+
67
+ 11. πŸ—ΊοΈ **Knowledge Mapping with Mermaid Diagrams**
68
+ - Flowchart generation
69
+ - Sequence diagram creation
70
+ - State diagrams
71
+
72
+ 12. πŸ’» **ML Code Generation with Streamlit/Gradio/HTML5+JS**
73
+ - Code completion
74
+ - Unit test generation
75
+ - Documentation synthesis
76
+ """
77
+
78
+ # Process multilevel markdown for PDF output
79
+ def markdown_to_pdf_content(markdown_text):
80
+ """Convert markdown text to a format suitable for PDF generation"""
81
+ lines = markdown_text.strip().split('\n')
82
+ pdf_content = []
83
+ in_list_item = False
84
+ current_item = None
85
+ sub_items = []
86
+
87
+ for line in lines:
88
+ line = line.strip()
89
+ if not line:
90
+ continue
91
+
92
+ if line.startswith('# '):
93
+ pass
94
+ elif line.startswith('## '):
95
+ if current_item and sub_items:
96
+ pdf_content.append([current_item, sub_items])
97
+ sub_items = []
98
+ current_item = None
99
+
100
+ section = line.replace('## ', '').strip()
101
+ pdf_content.append(f"<b>{section}</b>")
102
+ in_list_item = False
103
+ elif re.match(r'^\d+\.', line):
104
+ if current_item and sub_items:
105
+ pdf_content.append([current_item, sub_items])
106
+ sub_items = []
107
+
108
+ current_item = line.strip()
109
+ in_list_item = True
110
+ elif line.startswith('- ') and in_list_item:
111
+ sub_items.append(line.strip())
112
+ else:
113
+ if not in_list_item:
114
+ pdf_content.append(line.strip())
115
+
116
+ if current_item and sub_items:
117
+ pdf_content.append([current_item, sub_items])
118
+
119
+ mid_point = len(pdf_content) // 2
120
+ left_column = pdf_content[:mid_point]
121
+ right_column = pdf_content[mid_point:]
122
+
123
+ return left_column, right_column
124
+
125
+ # Main PDF creation using ReportLab
126
+ def create_main_pdf(markdown_text):
127
+ """Create a single-page landscape PDF with the outline in two columns"""
128
+ buffer = io.BytesIO()
129
+ doc = SimpleDocTemplate(
130
+ buffer,
131
+ pagesize=(A4[1], A4[0]), # Landscape
132
+ leftMargin=50,
133
+ rightMargin=50,
134
+ topMargin=50,
135
+ bottomMargin=50
136
+ )
137
+
138
+ styles = getSampleStyleSheet()
139
+ story = []
140
+
141
+ # Create custom styles
142
+ title_style = styles['Heading1']
143
+ title_style.textColor = colors.darkblue
144
+ title_style.alignment = 1 # Center alignment
145
+
146
+ section_style = ParagraphStyle(
147
+ 'SectionStyle',
148
+ parent=styles['Heading2'],
149
+ textColor=colors.darkblue,
150
+ spaceAfter=6
151
+ )
152
+
153
+ item_style = ParagraphStyle(
154
+ 'ItemStyle',
155
+ parent=styles['Normal'],
156
+ fontSize=11,
157
+ leading=14,
158
+ fontName='Helvetica-Bold'
159
+ )
160
+
161
+ subitem_style = ParagraphStyle(
162
+ 'SubItemStyle',
163
+ parent=styles['Normal'],
164
+ fontSize=10,
165
+ leading=12,
166
+ leftIndent=20
167
+ )
168
+
169
+ # Add title
170
+ story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style))
171
+ story.append(Spacer(1, 20))
172
+
173
+ # Process markdown content
174
+ left_column, right_column = markdown_to_pdf_content(markdown_text)
175
+
176
+ # Prepare data for table
177
+ left_cells = []
178
+ for item in left_column:
179
+ if isinstance(item, str) and item.startswith('<b>'):
180
+ text = item.replace('<b>', '').replace('</b>', '')
181
+ left_cells.append(Paragraph(text, section_style))
182
+ elif isinstance(item, list):
183
+ main_item, sub_items = item
184
+ left_cells.append(Paragraph(main_item, item_style))
185
+ for sub_item in sub_items:
186
+ left_cells.append(Paragraph(sub_item, subitem_style))
187
+ else:
188
+ left_cells.append(Paragraph(item, item_style))
189
+
190
+ right_cells = []
191
+ for item in right_column:
192
+ if isinstance(item, str) and item.startswith('<b>'):
193
+ text = item.replace('<b>', '').replace('</b>', '')
194
+ right_cells.append(Paragraph(text, section_style))
195
+ elif isinstance(item, list):
196
+ main_item, sub_items = item
197
+ right_cells.append(Paragraph(main_item, item_style))
198
+ for sub_item in sub_items:
199
+ right_cells.append(Paragraph(sub_item, subitem_style))
200
+ else:
201
+ right_cells.append(Paragraph(item, item_style))
202
+
203
+ # Make columns equal length
204
+ max_cells = max(len(left_cells), len(right_cells))
205
+ left_cells.extend([""] * (max_cells - len(left_cells)))
206
+ right_cells.extend([""] * (max_cells - len(right_cells)))
207
+
208
+ # Create table data
209
+ table_data = list(zip(left_cells, right_cells))
210
+
211
+ # Calculate column widths
212
+ col_width = (A4[1] - 120) / 2.0
213
+
214
+ # Create and style table
215
+ table = Table(table_data, colWidths=[col_width, col_width])
216
+ table.setStyle(TableStyle([
217
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
218
+ ('ALIGN', (0, 0), (0, -1), 'LEFT'),
219
+ ('ALIGN', (1, 0), (1, -1), 'LEFT'),
220
+ ('BACKGROUND', (0, 0), (-1, -1), colors.white),
221
+ ('GRID', (0, 0), (-1, -1), 0.5, colors.white),
222
+ ('LINEAFTER', (0, 0), (0, -1), 1, colors.grey),
223
+ ]))
224
+
225
+ story.append(table)
226
+ doc.build(story)
227
+ buffer.seek(0)
228
+ return buffer.getvalue()
229
+
230
+ # Streamlit UI
231
+ st.title("πŸš€ Cutting-Edge ML Outline Generator")
232
+
233
+ if st.button("Generate Main PDF"):
234
+ with st.spinner("Generating PDF..."):
235
+ pdf_bytes = create_main_pdf(ml_markdown)
236
+ st.download_button(
237
+ label="Download Main PDF",
238
+ data=pdf_bytes,
239
+ file_name="ml_outline.pdf",
240
+ mime="application/pdf"
241
+ )
242
+ base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
243
+ pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">'
244
+ st.markdown(pdf_display, unsafe_allow_html=True)
245
+ st.success("PDF generated successfully!")