Spaces:
Paused
Paused
import arxiv | |
from typing import List, Dict, Any | |
class ArxivRetrievalService: | |
def __init__(self): | |
self.client = arxiv.Client(delay_seconds=3, num_retries=3) | |
def fetch_metadata(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]: | |
search = arxiv.Search( | |
query=query, | |
max_results=max_results, | |
sort_by=arxiv.SortCriterion.SubmittedDate | |
) | |
results = [] | |
for result in self.client.results(search): | |
metadata = { | |
"title": result.title, | |
"authors": [author.name for author in result.authors], | |
"published": result.published.isoformat(), | |
"updated": result.updated.isoformat(), | |
"pdf_url": result.pdf_url, | |
"entry_id": result.entry_id, | |
"summary": result.summary, | |
"categories": result.categories, | |
"primary_category": result.primary_category, | |
"html_url": f"http://arxiv.org/abs/{result.entry_id.split('/')[-1]}" | |
} | |
results.append(metadata) | |
return results | |
# Usage: | |
# arxiv_service = ArxivRetrievalService() | |
# metadata = arxiv_service.fetch_metadata("quantum computing", max_results=5) | |