| import os | |
| import scholarpy | |
| import pandas as pd | |
| import streamlit as st | |
| import leafmap.foliumap as leafmap | |
| import plotly.express as px | |
| import datetime | |
| current_year = datetime.datetime.now().year | |
| if "dsl" not in st.session_state: | |
| st.session_state["dsl"] = scholarpy.Dsl() | |
| def app(): | |
| st.title("Search Publications") | |
| dsl = st.session_state["dsl"] | |
| ( | |
| row1_col1, | |
| row1_col2, | |
| row1_col3, | |
| row1_col4, | |
| row1_col5, | |
| ) = st.columns([1, 0.7, 1, 1, 1]) | |
| row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns( | |
| [1, 0.7, 1, 1, 1] | |
| ) | |
| with row1_col1: | |
| keywords = st.text_input("Enter a keyword to search for") | |
| with row1_col2: | |
| exact_match = st.checkbox("Exact match", True) | |
| with row1_col3: | |
| scope = st.selectbox( | |
| "Select a search scope", | |
| [ | |
| "authors", | |
| "concepts", | |
| "full_data", | |
| "full_data_exact", | |
| "title_abstract_only", | |
| "title_only", | |
| ], | |
| index=5, | |
| ) | |
| with row1_col4: | |
| years = st.slider( | |
| "Select the start and end year:", | |
| 1950, | |
| current_year + 6, | |
| (1980, current_year), | |
| ) | |
| with row1_col5: | |
| limit = st.slider("Select the number of publications to return", 1, 1000, 100) | |
| if keywords: | |
| result = dsl.search_pubs_by_keyword( | |
| keywords, | |
| exact_match, | |
| scope, | |
| start_year=years[0], | |
| end_year=years[1], | |
| limit=limit, | |
| ) | |
| df = scholarpy.json_to_df(result) | |
| affiliations = result.as_dataframe_authors_affiliations() | |
| country_df = affiliations.groupby(["pub_id"])["aff_country"].unique() | |
| df = df.merge(country_df, left_on="id", right_on="pub_id") | |
| countries = [c[c.astype(bool)].size for c in df["aff_country"]] | |
| df["country_count"] = countries | |
| journal_counts = df.copy()["journal.title"].value_counts() | |
| if limit > result.count_total: | |
| limit = result.count_total | |
| markdown = f""" | |
| Returned Publications: {limit} (total = {result.count_total}) | |
| """ | |
| with row2_col1: | |
| st.markdown(markdown) | |
| with row2_col2: | |
| filter = st.checkbox("Filter by journal") | |
| if filter: | |
| df["journal.title"] = df["journal.title"].astype(str) | |
| journals = df["journal.title"].unique() | |
| journals.sort() | |
| with row2_col3: | |
| journal = st.selectbox("Select a journal", journals) | |
| df = df[df["journal.title"] == journal] | |
| with row2_col4: | |
| st.write("") | |
| with row2_col5: | |
| st.write("") | |
| if df is not None: | |
| st.dataframe(df) | |
| leafmap.st_download_button("Download data", df, csv_sep="\t") | |
| summary = pd.DataFrame( | |
| {"Journal": journal_counts.index, "Count": journal_counts} | |
| ).reset_index(drop=True) | |
| markdown = f""" | |
| - Total number of journals: **{len(summary)}** | |
| """ | |
| row3_col1, row3_col2 = st.columns([1, 1]) | |
| with row3_col1: | |
| st.markdown(markdown) | |
| st.dataframe(summary) | |
| leafmap.st_download_button("Download data", summary, csv_sep="\t") | |
| with row3_col2: | |
| fig = px.box(df, x="year", y="country_count", title="Country Counts") | |
| st.plotly_chart(fig) | |