scholar / apps /h_index.py
giswqs's picture
Update st.cache
b1fc4cc
import dimcli
import pandas as pd
import sys
import os
import streamlit as st
import scholarpy
if "dsl" not in st.session_state:
st.session_state["dsl"] = scholarpy.Dsl()
@st.cache_data
def the_H_function(sorted_citations_list, n=1):
"""from a list of integers [n1, n2 ..] representing publications citations,
return the max list-position which is >= integer
eg
>>> the_H_function([10, 8, 5, 4, 3]) => 4
>>> the_H_function([25, 8, 5, 3, 3]) => 3
>>> the_H_function([1000, 20]) => 2
"""
if sorted_citations_list and sorted_citations_list[0] >= n:
return the_H_function(sorted_citations_list[1:], n + 1)
else:
return n - 1
def dim_login(key=None, endpoint=None):
if key is None:
KEY = os.environ.get("DIM_TOKEN")
if endpoint is None:
ENDPOINT = "https://app.dimensions.ai"
try:
dimcli.login(key=KEY, endpoint=ENDPOINT)
dsl = dimcli.Dsl()
return dsl
except:
raise Exception("Failed to login to Dimensions")
@st.cache_data
def get_pubs_df(dsl, researcher_id):
q = """search publications where researchers.id = "{}" return publications[id+title+doi+times_cited] sort by times_cited limit 1000"""
pubs = dsl.query(q.format(researcher_id))
return pubs.as_dataframe()
@st.cache_data
def get_citations(df):
return list(df.fillna(0)["times_cited"])
def app():
dsl = st.session_state["dsl"]
researchER_id = st.text_input("Enter researcher ID:", "ur.013632443777.66")
df = get_pubs_df(dsl, researchER_id)
st.dataframe(df)
citations = get_citations(df)
h_index = the_H_function(citations)
st.write(f"H-index: {h_index}")