import altair as alt import numpy as np import pandas as pd import streamlit as st import streamlit as st st.set_page_config(page_title="Post-Training Techniques for LLMs", layout="centered") st.title("🔧 Beyond Pretraining: Post-Training Techniques for LLMs") st.subheader("Distillation, Preference Optimization, and RLHF — Visualized") st.markdown(""" This Streamlit app hosts a visual guide to help navigate post-training strategies for language models, with real-world examples like **SmolLM3**, **Tulu**, and **DeepSeek-R1**. 📎 Download the full slide deck: 👉 [Click here to download (PDF)](https://huggingface.co/spaces/bird-of-paradise/post-training-techniques-guide/blob/main/src/Post%20Training%20Techniques.pdf) --- 🧠 **Topics covered:** - Tradeoffs between SFT, DPO/APO/GRPO, PPO - Subjective vs Rule-based rewards - How real open-source models chose their strategy Made with ❤️ by Jen Wei """) # Optional: Slide preview st.image("src/Post_Training_Techniques_preview_2.png", caption="Slide 1: Tradeoffs between Optimization Paths", use_container_width=True)