File size: 1,644 Bytes
35c98d9
 
 
 
 
 
 
 
 
 
 
 
 
 
5601530
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35c98d9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
import json
import requests
import csv
import pandas as pd
import tqdm

import cohere

from topically import Topically
from bertopic import BERTopic
from sklearn.cluster import KMeans
import numpy as np

venue = 'ICLR.cc/2023/Conference'
venue_short = 'iclr2023'

def get_conference_notes(venue, blind_submission=False):
    """
    Get all notes of a conference (data) from OpenReview API.
    If results are not final, you should set blind_submission=True.
    """

    blind_param = '-/Blind_Submission' if blind_submission else ''
    offset = 0
    notes = []
    while True:
        print('Offset:', offset, 'Data:', len(notes))
        url = f'https://api.openreview.net/notes?invitation={venue}/{blind_param}&offset={offset}'
        response = requests.get(url)
        data = response.json()
        if len(data['notes']) == 0:
            break
        offset += 1000
        notes.extend(data['notes'])
    return notes

raw_notes = get_conference_notes(venue, blind_submission=True)
st.write("Number of submissions at ICLR 2023:", len(raw_notes))

df_raw = pd.json_normalize(raw_notes)
# set index as first column
# df_raw.set_index(df_raw.columns[0], inplace=True)
accepted_venues = ['ICLR 2023 poster', 'ICLR 2023 notable top 5%', 'ICLR 2023 notable top 25%']
df = df_raw[df_raw["content.venue"].isin(accepted_venues)]
st.write("Number of submissions accepted at ICLR 2023:", len(df))

df_filtered = df[['id', 'content.title', 'content.keywords', 'content.abstract']]
df = df_filtered

list_of_abstracts = list(df["content.title"].values)



x = st.slider('Select a value')
st.write(x, 'squared is', x * x)