File size: 6,122 Bytes
92c0981
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# import streamlit as st
# import requests
# import xmltodict

# # arXiv API base URL
# ARXIV_API_BASE = "http://export.arxiv.org/api/query"

# def fetch_papers(query, max_results=10):
#     """Fetch papers from the arXiv API."""
#     try:
#         # Build the API query URL
#         api_url = f"{ARXIV_API_BASE}?search_query=all:{query}&start=0&max_results={max_results}"

#         # Make the API request
#         response = requests.get(api_url, headers={'Accept': 'application/xml'})
#         response.raise_for_status()

#         # Parse the XML response
#         data = xmltodict.parse(response.text)
#         entries = data.get('feed', {}).get('entry', [])

#         if not isinstance(entries, list):  # Handle single result
#             entries = [entries]

#         # Extract relevant fields
#         papers = []
#         for entry in entries:
#             papers.append({
#                 'title': entry.get('title'),
#                 'summary': entry.get('summary'),
#                 'published': entry.get('published'),
#                 'authors': [author['name'] for author in entry.get('author', [])] if isinstance(entry.get('author'), list) else [entry.get('author', {}).get('name')],
#                 'link': entry.get('id')
#             })

#         return papers
#     except Exception as e:
#         st.error(f"Error fetching papers: {e}")
#         return []

# # Streamlit app UI
# st.title("arXiv Research Paper Search")
# st.subheader("Find academic papers on your topic of interest")

# # Input fields
# query = st.text_input("Enter a topic or keywords", placeholder="e.g., machine learning, quantum computing")
# max_results = st.slider("Number of results", min_value=1, max_value=50, value=10)

# if st.button("Search"):
#     if query.strip():
#         st.info(f"Searching for papers on: **{query}**")
#         papers = fetch_papers(query, max_results)
        
#         if papers:
#             st.success(f"Found {len(papers)} papers!")
#             for idx, paper in enumerate(papers, start=1):
#                 st.write(f"### {idx}. {paper['title']}")
#                 st.write(f"**Authors**: {', '.join(paper['authors'])}")
#                 st.write(f"**Published**: {paper['published']}")
#                 st.write(f"[Read More]({paper['link']})")
#                 st.write("---")
#         else:
#             st.warning("No papers found. Try a different query.")
#     else:
#         st.error("Please enter a topic or keywords to search.")

import streamlit as st
import requests
import xmltodict
from datetime import datetime

# arXiv API base URL
ARXIV_API_BASE = "http://export.arxiv.org/api/query"

def fetch_papers(query, max_results=10):
    """Fetch papers from the arXiv API."""
    try:
        # Build the API query URL
        api_url = f"{ARXIV_API_BASE}?search_query=all:{query}&start=0&max_results={max_results}"

        # Make the API request
        response = requests.get(api_url, headers={'Accept': 'application/xml'})
        response.raise_for_status()

        # Parse the XML response
        data = xmltodict.parse(response.text)
        entries = data.get('feed', {}).get('entry', [])

        if not isinstance(entries, list):  # Handle single result
            entries = [entries]

        # Extract relevant fields
        papers = []
        for entry in entries:
            papers.append({
                'title': entry.get('title'),
                'summary': entry.get('summary'),
                'published': entry.get('published'),
                'authors': [author['name'] for author in entry.get('author', [])] if isinstance(entry.get('author'), list) else [entry.get('author', {}).get('name')],
                'link': entry.get('id')
            })

        return papers
    except Exception as e:
        st.error(f"Error fetching papers: {e}")
        return []

def filter_papers_by_year(papers, start_year, end_year):
    """Filter papers by the publication year range."""
    filtered_papers = []
    for paper in papers:
        try:
            published_year = int(paper['published'][:4])  # Extract year from the published date
            if start_year <= published_year <= end_year:
                filtered_papers.append(paper)
        except ValueError:
            continue  # Skip if the year is not valid
    return filtered_papers

# Streamlit app UI
st.title("arXiv Research Paper Search")
st.subheader("Find academic papers on your topic of interest")

# Input fields
query = st.text_input("Enter a topic or keywords", placeholder="e.g., machine learning, quantum computing")
max_results = st.slider("Number of results", min_value=1, max_value=50, value=10)

# Year filter
col1, col2 = st.columns(2)
with col1:
    start_year = st.number_input("Start Year", min_value=1900, max_value=datetime.now().year, value=2000, step=1)
with col2:
    end_year = st.number_input("End Year", min_value=1900, max_value=datetime.now().year, value=datetime.now().year, step=1)

if st.button("Search"):
    if query.strip():
        st.info(f"Searching for papers on: **{query}**")
        papers = fetch_papers(query, max_results)
        
        # Filter papers by year
        papers_filtered = filter_papers_by_year(papers, start_year, end_year)
        
        if papers_filtered:
            st.success(f"Found {len(papers_filtered)} papers between {start_year} and {end_year}!")
            for idx, paper in enumerate(papers_filtered, start=1):
                st.write(f"### {idx}. {paper['title']}")
                st.write(f"**Authors**: {', '.join(paper['authors'])}")
                st.write(f"**Published**: {paper['published']}")
                st.write(f"[Read More]({paper['link']})")
                st.write("---")
        else:
            st.warning(f"No papers found between {start_year} and {end_year}. Try a different query or adjust the year range.")
    else:
        st.error("Please enter a topic or keywords to search.")