File size: 3,591 Bytes
1da1c98 cc5bd12 1da1c98 cc5bd12 1da1c98 cc5bd12 1da1c98 cc5bd12 1da1c98 cc5bd12 1da1c98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import os
import scholarpy
import pandas as pd
import streamlit as st
import leafmap.foliumap as leafmap
import plotly.express as px
import datetime
current_year = datetime.datetime.now().year
if "dsl" not in st.session_state:
st.session_state["dsl"] = scholarpy.Dsl()
def app():
st.title("Search Publications")
dsl = st.session_state["dsl"]
(
row1_col1,
row1_col2,
row1_col3,
row1_col4,
row1_col5,
) = st.columns([1, 0.7, 1, 1, 1])
row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns(
[1, 0.7, 1, 1, 1]
)
with row1_col1:
keywords = st.text_input("Enter a keyword to search for")
with row1_col2:
exact_match = st.checkbox("Exact match", True)
with row1_col3:
scope = st.selectbox(
"Select a search scope",
[
"authors",
"concepts",
"full_data",
"full_data_exact",
"title_abstract_only",
"title_only",
],
index=5,
)
with row1_col4:
years = st.slider(
"Select the start and end year:",
1950,
current_year + 6,
(1980, current_year),
)
with row1_col5:
limit = st.slider("Select the number of publications to return", 1, 1000, 100)
if keywords:
result = dsl.search_pubs_by_keyword(
keywords,
exact_match,
scope,
start_year=years[0],
end_year=years[1],
limit=limit,
)
df = scholarpy.json_to_df(result)
affiliations = result.as_dataframe_authors_affiliations()
country_df = affiliations.groupby(["pub_id"])["aff_country"].unique()
df = df.merge(country_df, left_on="id", right_on="pub_id")
countries = [c[c.astype(bool)].size for c in df["aff_country"]]
df["country_count"] = countries
journal_counts = df.copy()["journal.title"].value_counts()
if limit > result.count_total:
limit = result.count_total
markdown = f"""
Returned Publications: {limit} (total = {result.count_total})
"""
with row2_col1:
st.markdown(markdown)
with row2_col2:
filter = st.checkbox("Filter by journal")
if filter:
df["journal.title"] = df["journal.title"].astype(str)
journals = df["journal.title"].unique()
journals.sort()
with row2_col3:
journal = st.selectbox("Select a journal", journals)
df = df[df["journal.title"] == journal]
with row2_col4:
st.write("")
with row2_col5:
st.write("")
if df is not None:
st.dataframe(df)
leafmap.st_download_button("Download data", df, csv_sep="\t")
summary = pd.DataFrame(
{"Journal": journal_counts.index, "Count": journal_counts}
).reset_index(drop=True)
markdown = f"""
- Total number of journals: **{len(summary)}**
"""
row3_col1, row3_col2 = st.columns([1, 1])
with row3_col1:
st.markdown(markdown)
st.dataframe(summary)
leafmap.st_download_button("Download data", summary, csv_sep="\t")
with row3_col2:
fig = px.box(df, x="year", y="country_count", title="Country Counts")
st.plotly_chart(fig)
|