Spaces:
Runtime error
Runtime error
Update results.py
Browse files- results.py +32 -0
results.py
CHANGED
|
@@ -6,6 +6,37 @@ from plotly import graph_objects as go
|
|
| 6 |
import pandas as pd
|
| 7 |
import plotly.express as px
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
#Perplexity Across Different Buckets (global)
|
| 10 |
# The data you provided
|
| 11 |
DATA = [
|
|
@@ -687,6 +718,7 @@ upsampling_exp = Div(
|
|
| 687 |
P("To account for differing dataset sizes, the evaluation scores represent the final evaluation score after the entire dataset has been processed."),
|
| 688 |
H3("Training Evaluations"),
|
| 689 |
P("We also conducted full scale training using TxT360 and FineWeb-1.5T. Below are plots of the training and validation loss curves for each dataset. We can see that TxT360 achieves a lower training and validation loss compared to FineWeb-1.5T. "),
|
|
|
|
| 690 |
)
|
| 691 |
|
| 692 |
perp1_div = Div(
|
|
|
|
| 6 |
import pandas as pd
|
| 7 |
import plotly.express as px
|
| 8 |
|
| 9 |
+
|
| 10 |
+
##upsampling validation loss graph
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Load the CSV file
|
| 14 |
+
data = pd.read_csv('data/validation_loss _txt360_vs_fineweb.csv')
|
| 15 |
+
|
| 16 |
+
# Create a plotly line chart
|
| 17 |
+
fig = go.Figure()
|
| 18 |
+
|
| 19 |
+
# Add the line for FineWeb-1.5T
|
| 20 |
+
fig.add_trace(go.Scatter(x=data['Step'], y=data['FineWeb-1.5T'], mode='lines', name='FineWeb-1.5T'))
|
| 21 |
+
|
| 22 |
+
# Add the line for TxT360
|
| 23 |
+
fig.add_trace(go.Scatter(x=data['Step'], y=data['TxT360'], mode='lines', name='TxT360'))
|
| 24 |
+
|
| 25 |
+
# Update layout
|
| 26 |
+
fig.update_layout(
|
| 27 |
+
title='Validation Loss Comparison: TxT360 vs FineWeb-1.5T',
|
| 28 |
+
xaxis_title='Steps',
|
| 29 |
+
yaxis_title='Validation Loss',
|
| 30 |
+
legend_title='Dataset',
|
| 31 |
+
template='plotly_white'
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# Show the plot
|
| 35 |
+
validation_loss_graph = fig
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
#Perplexity Across Different Buckets (global)
|
| 41 |
# The data you provided
|
| 42 |
DATA = [
|
|
|
|
| 718 |
P("To account for differing dataset sizes, the evaluation scores represent the final evaluation score after the entire dataset has been processed."),
|
| 719 |
H3("Training Evaluations"),
|
| 720 |
P("We also conducted full scale training using TxT360 and FineWeb-1.5T. Below are plots of the training and validation loss curves for each dataset. We can see that TxT360 achieves a lower training and validation loss compared to FineWeb-1.5T. "),
|
| 721 |
+
validation_loss_graph,
|
| 722 |
)
|
| 723 |
|
| 724 |
perp1_div = Div(
|