Spaces:
Runtime error
Runtime error
Update curated.py
Browse files- curated.py +50 -0
curated.py
CHANGED
|
@@ -856,6 +856,55 @@ fig.update_layout(
|
|
| 856 |
# Show the plot
|
| 857 |
diff_stacked_bar = fig
|
| 858 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
|
| 860 |
def curated(request):
|
| 861 |
|
|
@@ -992,6 +1041,7 @@ def curated(request):
|
|
| 992 |
plotly2fasthtml(get_chart_new()),
|
| 993 |
plotly2fasthtml(stacked_bar),
|
| 994 |
plotly2fasthtml(diff_stacked_bar),
|
|
|
|
| 995 |
H2("Curated Sources Processing"),
|
| 996 |
filtering_process,
|
| 997 |
data_preparation_div,
|
|
|
|
| 856 |
# Show the plot
|
| 857 |
diff_stacked_bar = fig
|
| 858 |
|
| 859 |
+
# Data for the stacked bar chart
|
| 860 |
+
data = {
|
| 861 |
+
'Filter': ['Downloaded Lines', 'Language Filter', 'Min Word Count', 'Unigram Log Probability'],
|
| 862 |
+
'Wikipedia': [61614907, 61614907, 60468491, 60468491],
|
| 863 |
+
'Freelaw': [75971288, 73690766, 68171834, 68123174],
|
| 864 |
+
'DM Maths': [112559888, 112559888, 112559888, 112559888],
|
| 865 |
+
'USPTO': [6880276, 6878964, 6749922, 6749389],
|
| 866 |
+
'PG19': [28752, 28683, 28682, 28632],
|
| 867 |
+
'Hackernews': [2064931, 2010802, 2010488, 2003636],
|
| 868 |
+
'Ubuntu IRC': [37966, 23501, 23468, 23205],
|
| 869 |
+
'Europarl': [69814, 69814, 69814, 69814],
|
| 870 |
+
'StackExchange': [23246548, 23246548, 23246352, 23246352],
|
| 871 |
+
'Arxiv': [1911867, 1869441, 1763840, 1762661],
|
| 872 |
+
'S2ORC': [12963563, 12963563, 12963563, 12963563],
|
| 873 |
+
'S2ORC Abstract': [102324176, 83867601, 82889293, 82777912],
|
| 874 |
+
'Pubmed Central': [5230932, 4830486, 4768310, 4767474],
|
| 875 |
+
'Pubmed Abstract': [25787474, 25784374, 25747955, 25746724],
|
| 876 |
+
'Phil Papers': [49389, 39175, 39175, 39128]
|
| 877 |
+
}
|
| 878 |
+
|
| 879 |
+
# Creating a dataframe
|
| 880 |
+
df = pd.DataFrame(data)
|
| 881 |
+
|
| 882 |
+
# Creating the stacked bar chart
|
| 883 |
+
fig = go.Figure()
|
| 884 |
+
|
| 885 |
+
# Add trace for each dataset
|
| 886 |
+
for dataset in df.columns[1:]:
|
| 887 |
+
fig.add_trace(go.Bar(
|
| 888 |
+
name=dataset,
|
| 889 |
+
x=df['Filter'],
|
| 890 |
+
y=df[dataset]
|
| 891 |
+
))
|
| 892 |
+
|
| 893 |
+
# Update the layout
|
| 894 |
+
fig.update_layout(
|
| 895 |
+
barmode='stack',
|
| 896 |
+
title='Stacked Bar Chart of Line Reductions by Filter for Each Dataset',
|
| 897 |
+
xaxis_title='Filter',
|
| 898 |
+
yaxis_title='Number of Lines',
|
| 899 |
+
legend_title='Dataset',
|
| 900 |
+
height=600,
|
| 901 |
+
width=1000
|
| 902 |
+
)
|
| 903 |
+
|
| 904 |
+
# Show the plot
|
| 905 |
+
diff2_stacked_bar = fig
|
| 906 |
+
|
| 907 |
+
|
| 908 |
|
| 909 |
def curated(request):
|
| 910 |
|
|
|
|
| 1041 |
plotly2fasthtml(get_chart_new()),
|
| 1042 |
plotly2fasthtml(stacked_bar),
|
| 1043 |
plotly2fasthtml(diff_stacked_bar),
|
| 1044 |
+
plotly2fasthtml(diff2_stacked_bar),
|
| 1045 |
H2("Curated Sources Processing"),
|
| 1046 |
filtering_process,
|
| 1047 |
data_preparation_div,
|