Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import base64 | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib import colors | |
| import io | |
| import re | |
| # Define the ML outline as a markdown string | |
| ml_markdown = """# Cutting-Edge ML Outline | |
| ## Core ML Techniques | |
| 1. π **Mixture of Experts (MoE)** | |
| - Conditional computation techniques | |
| - Sparse gating mechanisms | |
| - Training specialized sub-models | |
| 2. π₯ **Supervised Fine-Tuning (SFT) using PyTorch** | |
| - Loss function customization | |
| - Gradient accumulation strategies | |
| - Learning rate schedulers | |
| 3. π€ **Large Language Models (LLM) using Transformers** | |
| - Attention mechanisms | |
| - Tokenization strategies | |
| - Position encodings | |
| ## Training Methods | |
| 4. π **Self-Rewarding Learning using NPS 0-10 and Verbatims** | |
| - Custom reward functions | |
| - Feedback categorization | |
| - Signal extraction from text | |
| 5. π **Reinforcement Learning from Human Feedback (RLHF)** | |
| - Preference datasets | |
| - PPO implementation | |
| - KL divergence constraints | |
| 6. π **MergeKit: Merging Models to Same Embedding Space** | |
| - TIES merging | |
| - Task arithmetic | |
| - SLERP interpolation | |
| ## Optimization & Deployment | |
| 7. π **DistillKit: Model Size Reduction with Spectrum Analysis** | |
| - Knowledge distillation | |
| - Quantization techniques | |
| - Model pruning strategies | |
| 8. π§ **Agentic RAG Agents using Document Inputs** | |
| - Vector database integration | |
| - Query planning | |
| - Self-reflection mechanisms | |
| 9. β³ **Longitudinal Data Summarization from Multiple Docs** | |
| - Multi-document compression | |
| - Timeline extraction | |
| - Entity tracking | |
| ## Knowledge Representation | |
| 10. π **Knowledge Extraction using Markdown Knowledge Graphs** | |
| - Entity recognition | |
| - Relationship mapping | |
| - Hierarchical structuring | |
| 11. πΊοΈ **Knowledge Mapping with Mermaid Diagrams** | |
| - Flowchart generation | |
| - Sequence diagram creation | |
| - State diagrams | |
| 12. π» **ML Code Generation with Streamlit/Gradio/HTML5+JS** | |
| - Code completion | |
| - Unit test generation | |
| - Documentation synthesis | |
| """ | |
| # Process multilevel markdown for PDF output | |
| def markdown_to_pdf_content(markdown_text): | |
| """Convert markdown text to a format suitable for PDF generation""" | |
| lines = markdown_text.strip().split('\n') | |
| pdf_content = [] | |
| in_list_item = False | |
| current_item = None | |
| sub_items = [] | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| if line.startswith('# '): | |
| pass | |
| elif line.startswith('## '): | |
| if current_item and sub_items: | |
| pdf_content.append([current_item, sub_items]) | |
| sub_items = [] | |
| current_item = None | |
| section = line.replace('## ', '').strip() | |
| pdf_content.append(f"<b>{section}</b>") | |
| in_list_item = False | |
| elif re.match(r'^\d+\.', line): | |
| if current_item and sub_items: | |
| pdf_content.append([current_item, sub_items]) | |
| sub_items = [] | |
| current_item = line.strip() | |
| in_list_item = True | |
| elif line.startswith('- ') and in_list_item: | |
| sub_items.append(line.strip()) | |
| else: | |
| if not in_list_item: | |
| pdf_content.append(line.strip()) | |
| if current_item and sub_items: | |
| pdf_content.append([current_item, sub_items]) | |
| mid_point = len(pdf_content) // 2 | |
| left_column = pdf_content[:mid_point] | |
| right_column = pdf_content[mid_point:] | |
| return left_column, right_column | |
| # Main PDF creation using ReportLab | |
| def create_main_pdf(markdown_text): | |
| """Create a single-page landscape PDF with the outline in two columns""" | |
| buffer = io.BytesIO() | |
| doc = SimpleDocTemplate( | |
| buffer, | |
| pagesize=(A4[1], A4[0]), # Landscape A4: 841.89 x 595.27 points | |
| leftMargin=36, | |
| rightMargin=36, | |
| topMargin=36, | |
| bottomMargin=36 | |
| ) | |
| styles = getSampleStyleSheet() | |
| story = [] | |
| page_height = A4[0] - 72 | |
| title_height = 20 | |
| spacer_height = 10 | |
| available_content_height = page_height - title_height - spacer_height | |
| # Process columns first | |
| left_column, right_column = markdown_to_pdf_content(markdown_text) | |
| # Calculate total items by explicitly handling the unpacking | |
| total_items = 0 | |
| for col in (left_column, right_column): | |
| for item in col: | |
| if isinstance(item, list): | |
| main_item, sub_items = item | |
| total_items += 1 + len(sub_items) | |
| else: | |
| total_items += 1 | |
| # Dynamic font sizes | |
| base_font_size = max(6, min(11, 200 / total_items)) | |
| item_font_size = base_font_size | |
| subitem_font_size = base_font_size * 0.9 | |
| section_font_size = base_font_size * 1.2 | |
| # Create custom styles | |
| title_style = styles['Heading1'] | |
| title_style.textColor = colors.darkblue | |
| title_style.alignment = 1 | |
| title_style.fontSize = min(16, base_font_size * 1.5) | |
| section_style = ParagraphStyle( | |
| 'SectionStyle', | |
| parent=styles['Heading2'], | |
| textColor=colors.darkblue, | |
| fontSize=section_font_size, | |
| leading=section_font_size * 1.2, | |
| spaceAfter=2 | |
| ) | |
| item_style = ParagraphStyle( | |
| 'ItemStyle', | |
| parent=styles['Normal'], | |
| fontSize=item_font_size, | |
| leading=item_font_size * 1.2, | |
| fontName='Helvetica-Bold', | |
| spaceAfter=1 | |
| ) | |
| subitem_style = ParagraphStyle( | |
| 'SubItemStyle', | |
| parent=styles['Normal'], | |
| fontSize=subitem_font_size, | |
| leading=subitem_font_size * 1.2, | |
| leftIndent=10, | |
| spaceAfter=1 | |
| ) | |
| # Add title | |
| story.append(Paragraph("Cutting-Edge ML Outline (ReportLab)", title_style)) | |
| story.append(Spacer(1, spacer_height)) | |
| # Prepare data for table | |
| left_cells = [] | |
| for item in left_column: | |
| if isinstance(item, str) and item.startswith('<b>'): | |
| text = item.replace('<b>', '').replace('</b>', '') | |
| left_cells.append(Paragraph(text, section_style)) | |
| elif isinstance(item, list): | |
| main_item, sub_items = item | |
| left_cells.append(Paragraph(main_item, item_style)) | |
| for sub_item in sub_items: | |
| left_cells.append(Paragraph(sub_item, subitem_style)) | |
| else: | |
| left_cells.append(Paragraph(item, item_style)) | |
| right_cells = [] | |
| for item in right_column: | |
| if isinstance(item, str) and item.startswith('<b>'): | |
| text = item.replace('<b>', '').replace('</b>', '') | |
| right_cells.append(Paragraph(text, section_style)) | |
| elif isinstance(item, list): | |
| main_item, sub_items = item | |
| right_cells.append(Paragraph(main_item, item_style)) | |
| for sub_item in sub_items: | |
| right_cells.append(Paragraph(sub_item, subitem_style)) | |
| else: | |
| right_cells.append(Paragraph(item, item_style)) | |
| # Make columns equal length | |
| max_cells = max(len(left_cells), len(right_cells)) | |
| left_cells.extend([""] * (max_cells - len(left_cells))) | |
| right_cells.extend([""] * (max_cells - len(right_cells))) | |
| # Create table data | |
| table_data = list(zip(left_cells, right_cells)) | |
| # Calculate column widths | |
| col_width = (A4[1] - 72) / 2.0 | |
| # Create and style table | |
| table = Table(table_data, colWidths=[col_width, col_width], hAlign='CENTER') | |
| table.setStyle(TableStyle([ | |
| ('VALIGN', (0, 0), (-1, -1), 'TOP'), | |
| ('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
| ('BACKGROUND', (0, 0), (-1, -1), colors.white), | |
| ('GRID', (0, 0), (-1, -1), 0, colors.white), | |
| ('LINEAFTER', (0, 0), (0, -1), 0.5, colors.grey), | |
| ('LEFTPADDING', (0, 0), (-1, -1), 2), | |
| ('RIGHTPADDING', (0, 0), (-1, -1), 2), | |
| ('TOPPADDING', (0, 0), (-1, -1), 1), | |
| ('BOTTOMPADDING', (0, 0), (-1, -1), 1), | |
| ])) | |
| story.append(table) | |
| doc.build(story) | |
| buffer.seek(0) | |
| return buffer.getvalue() | |
| # Streamlit UI | |
| st.title("π Cutting-Edge ML Outline Generator") | |
| if st.button("Generate Main PDF"): | |
| with st.spinner("Generating PDF..."): | |
| pdf_bytes = create_main_pdf(ml_markdown) | |
| st.download_button( | |
| label="Download Main PDF", | |
| data=pdf_bytes, | |
| file_name="ml_outline.pdf", | |
| mime="application/pdf" | |
| ) | |
| base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8') | |
| pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="400px" type="application/pdf">' | |
| st.markdown(pdf_display, unsafe_allow_html=True) | |
| st.success("PDF generated successfully!") |