File size: 3,591 Bytes
1da1c98
 
 
 
 
 
cc5bd12
 
 
1da1c98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5bd12
 
 
 
 
 
1da1c98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5bd12
 
1da1c98
cc5bd12
 
1da1c98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5bd12
1da1c98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import scholarpy
import pandas as pd
import streamlit as st
import leafmap.foliumap as leafmap
import plotly.express as px
import datetime

current_year = datetime.datetime.now().year

if "dsl" not in st.session_state:
    st.session_state["dsl"] = scholarpy.Dsl()


def app():
    st.title("Search Publications")
    dsl = st.session_state["dsl"]

    (
        row1_col1,
        row1_col2,
        row1_col3,
        row1_col4,
        row1_col5,
    ) = st.columns([1, 0.7, 1, 1, 1])

    row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns(
        [1, 0.7, 1, 1, 1]
    )

    with row1_col1:
        keywords = st.text_input("Enter a keyword to search for")

    with row1_col2:
        exact_match = st.checkbox("Exact match", True)

    with row1_col3:
        scope = st.selectbox(
            "Select a search scope",
            [
                "authors",
                "concepts",
                "full_data",
                "full_data_exact",
                "title_abstract_only",
                "title_only",
            ],
            index=5,
        )

    with row1_col4:
        years = st.slider(
            "Select the start and end year:",
            1950,
            current_year + 6,
            (1980, current_year),
        )

    with row1_col5:
        limit = st.slider("Select the number of publications to return", 1, 1000, 100)

    if keywords:
        result = dsl.search_pubs_by_keyword(
            keywords,
            exact_match,
            scope,
            start_year=years[0],
            end_year=years[1],
            limit=limit,
        )

        df = scholarpy.json_to_df(result)
        affiliations = result.as_dataframe_authors_affiliations()
        country_df = affiliations.groupby(["pub_id"])["aff_country"].unique()
        df = df.merge(country_df, left_on="id", right_on="pub_id")

        countries = [c[c.astype(bool)].size for c in df["aff_country"]]
        df["country_count"] = countries

        journal_counts = df.copy()["journal.title"].value_counts()
        if limit > result.count_total:
            limit = result.count_total
        markdown = f"""
        Returned Publications: {limit} (total = {result.count_total})        
        
        """

        with row2_col1:
            st.markdown(markdown)

        with row2_col2:
            filter = st.checkbox("Filter by journal")

        if filter:
            df["journal.title"] = df["journal.title"].astype(str)
            journals = df["journal.title"].unique()
            journals.sort()
            with row2_col3:
                journal = st.selectbox("Select a journal", journals)
            df = df[df["journal.title"] == journal]

        with row2_col4:
            st.write("")

        with row2_col5:
            st.write("")

        if df is not None:
            st.dataframe(df)
            leafmap.st_download_button("Download data", df, csv_sep="\t")

            summary = pd.DataFrame(
                {"Journal": journal_counts.index, "Count": journal_counts}
            ).reset_index(drop=True)
            markdown = f"""
            - Total number of journals: **{len(summary)}**
            """

            row3_col1, row3_col2 = st.columns([1, 1])

            with row3_col1:
                st.markdown(markdown)
                st.dataframe(summary)
                leafmap.st_download_button("Download data", summary, csv_sep="\t")

            with row3_col2:
                fig = px.box(df, x="year", y="country_count", title="Country Counts")
                st.plotly_chart(fig)