pdf-summarizer-app / papersearch.py
3a05chatgpt's picture
Upload 8 files
93c008b verified
raw
history blame
6.12 kB
# import streamlit as st
# import requests
# import xmltodict
# # arXiv API base URL
# ARXIV_API_BASE = "http://export.arxiv.org/api/query"
# def fetch_papers(query, max_results=10):
# """Fetch papers from the arXiv API."""
# try:
# # Build the API query URL
# api_url = f"{ARXIV_API_BASE}?search_query=all:{query}&start=0&max_results={max_results}"
# # Make the API request
# response = requests.get(api_url, headers={'Accept': 'application/xml'})
# response.raise_for_status()
# # Parse the XML response
# data = xmltodict.parse(response.text)
# entries = data.get('feed', {}).get('entry', [])
# if not isinstance(entries, list): # Handle single result
# entries = [entries]
# # Extract relevant fields
# papers = []
# for entry in entries:
# papers.append({
# 'title': entry.get('title'),
# 'summary': entry.get('summary'),
# 'published': entry.get('published'),
# 'authors': [author['name'] for author in entry.get('author', [])] if isinstance(entry.get('author'), list) else [entry.get('author', {}).get('name')],
# 'link': entry.get('id')
# })
# return papers
# except Exception as e:
# st.error(f"Error fetching papers: {e}")
# return []
# # Streamlit app UI
# st.title("arXiv Research Paper Search")
# st.subheader("Find academic papers on your topic of interest")
# # Input fields
# query = st.text_input("Enter a topic or keywords", placeholder="e.g., machine learning, quantum computing")
# max_results = st.slider("Number of results", min_value=1, max_value=50, value=10)
# if st.button("Search"):
# if query.strip():
# st.info(f"Searching for papers on: **{query}**")
# papers = fetch_papers(query, max_results)
# if papers:
# st.success(f"Found {len(papers)} papers!")
# for idx, paper in enumerate(papers, start=1):
# st.write(f"### {idx}. {paper['title']}")
# st.write(f"**Authors**: {', '.join(paper['authors'])}")
# st.write(f"**Published**: {paper['published']}")
# st.write(f"[Read More]({paper['link']})")
# st.write("---")
# else:
# st.warning("No papers found. Try a different query.")
# else:
# st.error("Please enter a topic or keywords to search.")
import streamlit as st
import requests
import xmltodict
from datetime import datetime
# arXiv API base URL
ARXIV_API_BASE = "http://export.arxiv.org/api/query"
def fetch_papers(query, max_results=10):
"""Fetch papers from the arXiv API."""
try:
# Build the API query URL
api_url = f"{ARXIV_API_BASE}?search_query=all:{query}&start=0&max_results={max_results}"
# Make the API request
response = requests.get(api_url, headers={'Accept': 'application/xml'})
response.raise_for_status()
# Parse the XML response
data = xmltodict.parse(response.text)
entries = data.get('feed', {}).get('entry', [])
if not isinstance(entries, list): # Handle single result
entries = [entries]
# Extract relevant fields
papers = []
for entry in entries:
papers.append({
'title': entry.get('title'),
'summary': entry.get('summary'),
'published': entry.get('published'),
'authors': [author['name'] for author in entry.get('author', [])] if isinstance(entry.get('author'), list) else [entry.get('author', {}).get('name')],
'link': entry.get('id')
})
return papers
except Exception as e:
st.error(f"Error fetching papers: {e}")
return []
def filter_papers_by_year(papers, start_year, end_year):
"""Filter papers by the publication year range."""
filtered_papers = []
for paper in papers:
try:
published_year = int(paper['published'][:4]) # Extract year from the published date
if start_year <= published_year <= end_year:
filtered_papers.append(paper)
except ValueError:
continue # Skip if the year is not valid
return filtered_papers
# Streamlit app UI
st.title("arXiv Research Paper Search")
st.subheader("Find academic papers on your topic of interest")
# Input fields
query = st.text_input("Enter a topic or keywords", placeholder="e.g., machine learning, quantum computing")
max_results = st.slider("Number of results", min_value=1, max_value=50, value=10)
# Year filter
col1, col2 = st.columns(2)
with col1:
start_year = st.number_input("Start Year", min_value=1900, max_value=datetime.now().year, value=2000, step=1)
with col2:
end_year = st.number_input("End Year", min_value=1900, max_value=datetime.now().year, value=datetime.now().year, step=1)
if st.button("Search"):
if query.strip():
st.info(f"Searching for papers on: **{query}**")
papers = fetch_papers(query, max_results)
# Filter papers by year
papers_filtered = filter_papers_by_year(papers, start_year, end_year)
if papers_filtered:
st.success(f"Found {len(papers_filtered)} papers between {start_year} and {end_year}!")
for idx, paper in enumerate(papers_filtered, start=1):
st.write(f"### {idx}. {paper['title']}")
st.write(f"**Authors**: {', '.join(paper['authors'])}")
st.write(f"**Published**: {paper['published']}")
st.write(f"[Read More]({paper['link']})")
st.write("---")
else:
st.warning(f"No papers found between {start_year} and {end_year}. Try a different query or adjust the year range.")
else:
st.error("Please enter a topic or keywords to search.")