Spaces:

metacritical
/

DeepSeekPapers

Running

App Files Files Community

metacritical commited on Feb 18

Commit

5852be1

verified ·

1 Parent(s): 63c5240

default more papers.

Browse files

Files changed (1) hide show

index.html +31 -44

index.html CHANGED Viewed

@@ -10,35 +10,6 @@
   <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
   <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
   <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
-  <style>
-    .publication-title {
-      color: #363636;
-    }
-    .paper-card {
-      margin-bottom: 2rem;
-      transition: transform 0.2s;
-    }
-    .paper-card:hover {
-      transform: translateY(-5px);
-    }
-    .coming-soon-badge {
-      background-color: #3273dc;
-      color: white;
-      padding: 0.25rem 0.75rem;
-      border-radius: 4px;
-      font-size: 0.8rem;
-      margin-left: 1rem;
-    }
-    .paper-description {
-      color: #4a4a4a;
-      margin-top: 0.5rem;
-    }
-    .release-date {
-      color: #7a7a7a;
-      font-size: 0.9rem;
-    }
-  </style>
 </head>
 <body>
@@ -61,32 +32,32 @@
       <div class="columns is-centered">
         <div class="column is-10">
-          <!-- DeepSeekLLM -->
           <div class="card paper-card">
             <div class="card-content">
               <h3 class="title is-4">
-                DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism
                 <span class="coming-soon-badge">Deep Dive Coming Soon</span>
               </h3>
-              <p class="release-date">Released: November 29, 2023</p>
               <p class="paper-description">
-                This foundational paper explores scaling laws and the trade-offs between data and model size,
-                establishing the groundwork for subsequent models.
               </p>
             </div>
           </div>
-          <!-- DeepSeek-V2 -->
           <div class="card paper-card">
             <div class="card-content">
               <h3 class="title is-4">
-                DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model
                 <span class="coming-soon-badge">Deep Dive Coming Soon</span>
               </h3>
-              <p class="release-date">Released: May 2024</p>
               <p class="paper-description">
-                Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing
-                training costs by 42%. Emphasizes strong performance characteristics and efficiency improvements.
               </p>
             </div>
           </div>
@@ -106,17 +77,17 @@
             </div>
           </div>
-          <!-- DeepSeek-R1 -->
           <div class="card paper-card">
             <div class="card-content">
               <h3 class="title is-4">
-                DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning
                 <span class="coming-soon-badge">Deep Dive Coming Soon</span>
               </h3>
-              <p class="release-date">Released: January 20, 2025</p>
               <p class="paper-description">
-                The R1 model builds on previous work to enhance reasoning capabilities through large-scale
-                reinforcement learning, competing directly with leading models like OpenAI's o1.
               </p>
             </div>
           </div>
@@ -136,6 +107,22 @@
             </div>
           </div>
           <!-- DeepSeek-Prover -->
           <div class="card paper-card">
             <div class="card-content">

   <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
   <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
   <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
 </head>
 <body>
       <div class="columns is-centered">
         <div class="column is-10">
+          <!-- Native Sparse Attention -->
           <div class="card paper-card">
             <div class="card-content">
               <h3 class="title is-4">
+                <a href="https://arxiv.org/abs/2502.11089">Native Sparse Attention: Hardware-Aligned and Natively Trainable Sparse Attention</a>
                 <span class="coming-soon-badge">Deep Dive Coming Soon</span>
               </h3>
+              <p class="release-date">Released: February 2025</p>
               <p class="paper-description">
+                Introduces a new approach to sparse attention that is both hardware-efficient and natively trainable,
+                improving the performance of large language models.
               </p>
             </div>
           </div>
+          <!-- DeepSeek-R1 -->
           <div class="card paper-card">
             <div class="card-content">
               <h3 class="title is-4">
+                DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning
                 <span class="coming-soon-badge">Deep Dive Coming Soon</span>
               </h3>
+              <p class="release-date">Released: January 20, 2025</p>
               <p class="paper-description">
+                The R1 model builds on previous work to enhance reasoning capabilities through large-scale
+                reinforcement learning, competing directly with leading models like OpenAI's o1.
               </p>
             </div>
           </div>
             </div>
           </div>
+          <!-- DeepSeek-V2 -->
           <div class="card paper-card">
             <div class="card-content">
               <h3 class="title is-4">
+                DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model
                 <span class="coming-soon-badge">Deep Dive Coming Soon</span>
               </h3>
+              <p class="release-date">Released: May 2024</p>
               <p class="paper-description">
+                Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing
+                training costs by 42%. Emphasizes strong performance characteristics and efficiency improvements.
               </p>
             </div>
           </div>
             </div>
           </div>
+          <!-- DeepSeekLLM -->
+          <div class="card paper-card">
+            <div class="card-content">
+              <h3 class="title is-4">
+                DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism
+                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
+              </h3>
+              <p class="release-date">Released: November 29, 2023</p>
+              <p class="paper-description">
+                This foundational paper explores scaling laws and the trade-offs between data and model size,
+                establishing the groundwork for subsequent models.
+              </p>
+            </div>
+          </div>
+          <!-- Papers without specific dates -->
           <!-- DeepSeek-Prover -->
           <div class="card paper-card">
             <div class="card-content">