scholar / apps /publication.py
giswqs's picture
Update year
cc5bd12
raw
history blame
3.59 kB
import os
import scholarpy
import pandas as pd
import streamlit as st
import leafmap.foliumap as leafmap
import plotly.express as px
import datetime
current_year = datetime.datetime.now().year
if "dsl" not in st.session_state:
st.session_state["dsl"] = scholarpy.Dsl()
def app():
st.title("Search Publications")
dsl = st.session_state["dsl"]
(
row1_col1,
row1_col2,
row1_col3,
row1_col4,
row1_col5,
) = st.columns([1, 0.7, 1, 1, 1])
row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns(
[1, 0.7, 1, 1, 1]
)
with row1_col1:
keywords = st.text_input("Enter a keyword to search for")
with row1_col2:
exact_match = st.checkbox("Exact match", True)
with row1_col3:
scope = st.selectbox(
"Select a search scope",
[
"authors",
"concepts",
"full_data",
"full_data_exact",
"title_abstract_only",
"title_only",
],
index=5,
)
with row1_col4:
years = st.slider(
"Select the start and end year:",
1950,
current_year + 6,
(1980, current_year),
)
with row1_col5:
limit = st.slider("Select the number of publications to return", 1, 1000, 100)
if keywords:
result = dsl.search_pubs_by_keyword(
keywords,
exact_match,
scope,
start_year=years[0],
end_year=years[1],
limit=limit,
)
df = scholarpy.json_to_df(result)
affiliations = result.as_dataframe_authors_affiliations()
country_df = affiliations.groupby(["pub_id"])["aff_country"].unique()
df = df.merge(country_df, left_on="id", right_on="pub_id")
countries = [c[c.astype(bool)].size for c in df["aff_country"]]
df["country_count"] = countries
journal_counts = df.copy()["journal.title"].value_counts()
if limit > result.count_total:
limit = result.count_total
markdown = f"""
Returned Publications: {limit} (total = {result.count_total})
"""
with row2_col1:
st.markdown(markdown)
with row2_col2:
filter = st.checkbox("Filter by journal")
if filter:
df["journal.title"] = df["journal.title"].astype(str)
journals = df["journal.title"].unique()
journals.sort()
with row2_col3:
journal = st.selectbox("Select a journal", journals)
df = df[df["journal.title"] == journal]
with row2_col4:
st.write("")
with row2_col5:
st.write("")
if df is not None:
st.dataframe(df)
leafmap.st_download_button("Download data", df, csv_sep="\t")
summary = pd.DataFrame(
{"Journal": journal_counts.index, "Count": journal_counts}
).reset_index(drop=True)
markdown = f"""
- Total number of journals: **{len(summary)}**
"""
row3_col1, row3_col2 = st.columns([1, 1])
with row3_col1:
st.markdown(markdown)
st.dataframe(summary)
leafmap.st_download_button("Download data", summary, csv_sep="\t")
with row3_col2:
fig = px.box(df, x="year", y="country_count", title="Country Counts")
st.plotly_chart(fig)