Spaces:
Sleeping
Sleeping
import streamlit as st | |
import json | |
import requests | |
import csv | |
import pandas as pd | |
import tqdm | |
import cohere | |
from topically import Topically | |
from bertopic import BERTopic | |
from sklearn.cluster import KMeans | |
import numpy as np | |
venue = 'ICLR.cc/2023/Conference' | |
venue_short = 'iclr2023' | |
def get_conference_notes(venue, blind_submission=False): | |
""" | |
Get all notes of a conference (data) from OpenReview API. | |
If results are not final, you should set blind_submission=True. | |
""" | |
blind_param = '-/Blind_Submission' if blind_submission else '' | |
offset = 0 | |
notes = [] | |
while True: | |
print('Offset:', offset, 'Data:', len(notes)) | |
url = f'https://api.openreview.net/notes?invitation={venue}/{blind_param}&offset={offset}' | |
response = requests.get(url) | |
data = response.json() | |
if len(data['notes']) == 0: | |
break | |
offset += 1000 | |
notes.extend(data['notes']) | |
return notes | |
raw_notes = get_conference_notes(venue, blind_submission=True) | |
st.write("Number of submissions at ICLR 2023:", len(raw_notes)) | |
df_raw = pd.json_normalize(raw_notes) | |
# set index as first column | |
# df_raw.set_index(df_raw.columns[0], inplace=True) | |
accepted_venues = ['ICLR 2023 poster', 'ICLR 2023 notable top 5%', 'ICLR 2023 notable top 25%'] | |
df = df_raw[df_raw["content.venue"].isin(accepted_venues)] | |
st.write("Number of submissions accepted at ICLR 2023:", len(df)) | |
df_filtered = df[['id', 'content.title', 'content.keywords', 'content.abstract']] | |
df = df_filtered | |
list_of_abstracts = list(df["content.title"].values) | |
x = st.slider('Select a value') | |
st.write(x, 'squared is', x * x) |