retrieval_metadata / arxiv_retrieval_service.py
donb-hf's picture
adding components
97e8d87
import arxiv
from typing import List, Dict, Any
class ArxivRetrievalService:
def __init__(self):
self.client = arxiv.Client(delay_seconds=3, num_retries=3)
def fetch_metadata(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
search = arxiv.Search(
query=query,
max_results=max_results,
sort_by=arxiv.SortCriterion.SubmittedDate
)
results = []
for result in self.client.results(search):
metadata = {
"title": result.title,
"authors": [author.name for author in result.authors],
"published": result.published.isoformat(),
"updated": result.updated.isoformat(),
"pdf_url": result.pdf_url,
"entry_id": result.entry_id,
"summary": result.summary,
"categories": result.categories,
"primary_category": result.primary_category,
"html_url": f"http://arxiv.org/abs/{result.entry_id.split('/')[-1]}"
}
results.append(metadata)
return results
# Usage:
# arxiv_service = ArxivRetrievalService()
# metadata = arxiv_service.fetch_metadata("quantum computing", max_results=5)