Spaces:
Running
Running
import os | |
import sys | |
from dotenv import load_dotenv | |
load_dotenv(verbose=True) | |
from pathlib import Path | |
import argparse | |
from mmengine import DictAction | |
from datetime import date, datetime, timedelta | |
from typing import Any, Dict, List, Optional | |
from fastapi.staticfiles import StaticFiles | |
from fastapi import FastAPI, HTTPException | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import FileResponse | |
import httpx | |
from bs4 import BeautifulSoup | |
import json | |
import asyncio | |
import uvicorn | |
root = str(Path(__file__).parent) | |
sys.path.append(root) | |
from src.database import db | |
from src.logger import logger | |
from src.config import config | |
from src.crawl import HuggingFaceDailyPapers | |
from src.agents.evaluator import run_evaluation | |
app = FastAPI(title="PaperAgent") | |
# Local development: allow same-origin and localhost | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
def parse_args(): | |
parser = argparse.ArgumentParser(description='main') | |
parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path") | |
parser.add_argument( | |
'--cfg-options', | |
nargs='+', | |
action=DictAction, | |
help='override some settings in the used config, the key-value pair ' | |
'in xxx=yyy format will be merged into config file. If the value to ' | |
'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' | |
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' | |
'Note that the quotation marks are necessary and that no white space ' | |
'is allowed.') | |
args = parser.parse_args() | |
return args | |
# Remove the find_next_available_date function since we're using HuggingFace's redirect mechanism | |
async def get_daily(date_str: Optional[str] = None, direction: Optional[str] = None) -> Dict[str, Any]: | |
target_date = date_str or date.today().isoformat() | |
# Initialize HuggingFaceDailyPapers | |
hf_daily = HuggingFaceDailyPapers() | |
# First, check if we have fresh cache for the requested date | |
cached_data = await db.get_cached_papers(target_date) | |
if cached_data and await db.is_cache_fresh(target_date): | |
print(f"Using cached data for {target_date}") | |
return { | |
"date": target_date, | |
"requested_date": target_date, | |
"cards": cached_data['cards'], | |
"fallback_used": False, | |
"cached": True, | |
"cached_at": cached_data['cached_at'] | |
} | |
# Handle different navigation directions | |
if direction == "prev": | |
# For previous navigation, use redirect mechanism to find the most recent available date | |
try: | |
actual_date, html = await hf_daily.fetch_daily_html(target_date) | |
print(f"Previous navigation: fetched {actual_date} (requested {target_date})") | |
# If we got redirected to a different date, that's our fallback | |
if actual_date != target_date: | |
print(f"Redirected from {target_date} to {actual_date}") | |
# Check if the redirected date has fresh cache | |
cached_data = await db.get_cached_papers(actual_date) | |
if cached_data and await db.is_cache_fresh(actual_date): | |
print(f"Using cached data for redirected date {actual_date}") | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": cached_data['cards'], | |
"fallback_used": True, | |
"cached": True, | |
"cached_at": cached_data['cached_at'] | |
} | |
# Process the HTML we got | |
cards = hf_daily.parse_daily_cards(html) | |
enriched_cards = await enrich_cards(cards) | |
# Cache the results for the redirected date | |
await db.cache_papers(actual_date, html, enriched_cards) | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": enriched_cards, | |
"fallback_used": True, | |
"cached": False | |
} | |
# If we got the exact date we requested, process normally | |
cards = hf_daily.parse_daily_cards(html) | |
enriched_cards = await enrich_cards(cards) | |
await db.cache_papers(actual_date, html, enriched_cards) | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": enriched_cards, | |
"fallback_used": False, | |
"cached": False | |
} | |
except Exception as e: | |
print(f"Failed to fetch {target_date} for previous navigation: {e}") | |
# Fallback to cached data if available | |
cached_data = await db.get_cached_papers(target_date) | |
if cached_data: | |
return { | |
"date": target_date, | |
"requested_date": target_date, | |
"cards": cached_data['cards'], | |
"fallback_used": False, | |
"cached": True, | |
"cached_at": cached_data['cached_at'] | |
} | |
raise HTTPException(status_code=503, detail="Unable to fetch papers and no cache available") | |
elif direction == "next": | |
# For next navigation, we need to find the next available date | |
# First try the exact date | |
try: | |
actual_date, html = await hf_daily.fetch_daily_html(target_date) | |
print(f"Next navigation: fetched {actual_date} (requested {target_date})") | |
# If we got the exact date we requested, that's perfect | |
if actual_date == target_date: | |
cards = hf_daily.parse_daily_cards(html) | |
enriched_cards = await enrich_cards(cards) | |
await db.cache_papers(actual_date, html, enriched_cards) | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": enriched_cards, | |
"fallback_used": False, | |
"cached": False | |
} | |
# If we got redirected, it means the requested date doesn't exist | |
# We need to find the next available date by incrementing | |
print(f"Requested date {target_date} doesn't exist, searching for next available date") | |
# Try to find the next available date by incrementing | |
next_date = await find_next_available_date_forward(target_date) | |
if next_date: | |
cached_data = await db.get_cached_papers(next_date) | |
if cached_data and await db.is_cache_fresh(next_date): | |
print(f"Using cached data for next available date {next_date}") | |
return { | |
"date": next_date, | |
"requested_date": target_date, | |
"cards": cached_data['cards'], | |
"fallback_used": True, | |
"cached": True, | |
"cached_at": cached_data['cached_at'] | |
} | |
# Fetch the next available date | |
actual_date, html = await hf_daily.fetch_daily_html(next_date) | |
cards = hf_daily.parse_daily_cards(html) | |
enriched_cards = await enrich_cards(cards) | |
await db.cache_papers(actual_date, html, enriched_cards) | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": enriched_cards, | |
"fallback_used": True, | |
"cached": False | |
} | |
# If no next date found, return empty | |
return { | |
"date": target_date, | |
"requested_date": target_date, | |
"cards": [], | |
"fallback_used": False, | |
"cached": False | |
} | |
except Exception as e: | |
print(f"Failed to fetch {target_date} for next navigation: {e}") | |
# Try to find next available date | |
next_date = await find_next_available_date_forward(target_date) | |
if next_date: | |
cached_data = await db.get_cached_papers(next_date) | |
if cached_data: | |
return { | |
"date": next_date, | |
"requested_date": target_date, | |
"cards": cached_data['cards'], | |
"fallback_used": True, | |
"cached": True, | |
"cached_at": cached_data['cached_at'] | |
} | |
# If no cache available, return error | |
raise HTTPException(status_code=503, detail="Unable to fetch papers and no cache available") | |
else: | |
# No direction specified, try the exact date first | |
try: | |
actual_date, html = await hf_daily.fetch_daily_html(target_date) | |
print(f"Direct fetch: fetched {actual_date} (requested {target_date})") | |
# If we got redirected, that's our fallback | |
if actual_date != target_date: | |
print(f"Redirected from {target_date} to {actual_date}") | |
# Check if the redirected date has fresh cache | |
cached_data = await db.get_cached_papers(actual_date) | |
if cached_data and await db.is_cache_fresh(actual_date): | |
print(f"Using cached data for redirected date {actual_date}") | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": cached_data['cards'], | |
"fallback_used": True, | |
"cached": True, | |
"cached_at": cached_data['cached_at'] | |
} | |
# Process the HTML we got | |
cards = hf_daily.parse_daily_cards(html) | |
enriched_cards = await enrich_cards(cards) | |
# Cache the results for the redirected date | |
await db.cache_papers(actual_date, html, enriched_cards) | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": enriched_cards, | |
"fallback_used": True, | |
"cached": False | |
} | |
# If we got the exact date we requested, process normally | |
cards = hf_daily.parse_daily_cards(html) | |
enriched_cards = await enrich_cards(cards) | |
await db.cache_papers(actual_date, html, enriched_cards) | |
return { | |
"date": actual_date, | |
"requested_date": target_date, | |
"cards": enriched_cards, | |
"fallback_used": False, | |
"cached": False | |
} | |
except Exception as e: | |
print(f"Failed to fetch {target_date}: {e}") | |
# If everything fails, return cached data if available | |
cached_data = await db.get_cached_papers(target_date) | |
if cached_data: | |
return { | |
"date": target_date, | |
"requested_date": target_date, | |
"cards": cached_data['cards'], | |
"fallback_used": False, | |
"cached": True, | |
"cached_at": cached_data['cached_at'] | |
} | |
# If no cache available, return error | |
raise HTTPException(status_code=503, detail="Unable to fetch papers and no cache available") | |
async def find_next_available_date_forward(start_date: str, max_attempts: int = 30) -> Optional[str]: | |
"""Find the next available date by incrementing and checking""" | |
from datetime import datetime, timedelta | |
current_date = datetime.strptime(start_date, "%Y-%m-%d") | |
for i in range(max_attempts): | |
current_date += timedelta(days=1) | |
date_str = current_date.strftime("%Y-%m-%d") | |
# Check if we have cache for this date | |
cached_data = await db.get_cached_papers(date_str) | |
if cached_data: | |
return date_str | |
# Try to fetch this date (but don't wait too long) | |
try: | |
import httpx | |
from src.crawl.huggingface_daily import HuggingFaceDailyPapers | |
hf_daily = HuggingFaceDailyPapers() | |
# Use a shorter timeout for quick checks | |
async with httpx.AsyncClient(timeout=5) as client: | |
actual_date, html = await hf_daily.fetch_daily_html(date_str) | |
if actual_date == date_str: | |
return date_str | |
except Exception as e: | |
print(f"Failed to check {date_str}: {e}") | |
continue | |
return None | |
async def enrich_cards(cards): | |
"""Enrich cards with paper details from database""" | |
for c in cards: | |
arxiv_id = c.get("arxiv_id") | |
if arxiv_id: | |
paper = await db.get_paper(arxiv_id) | |
if paper: | |
# Add evaluation status | |
c["has_eval"] = paper.get('is_evaluated', False) | |
c["is_evaluated"] = paper.get('is_evaluated', False) | |
# Add evaluation details if available | |
if paper.get('is_evaluated'): | |
c["evaluation_score"] = paper.get('evaluation_score') | |
c["overall_score"] = paper.get('overall_score') | |
c["evaluation_date"] = paper.get('evaluation_date') | |
c["evaluation_tags"] = paper.get('evaluation_tags') | |
# Add paper details (use cached data as fallback) | |
if not c.get("title") and paper.get("title"): | |
c["title"] = paper["title"] | |
if not c.get("authors") and paper.get("authors"): | |
c["authors"] = paper["authors"] | |
if not c.get("abstract") and paper.get("abstract"): | |
c["abstract"] = paper["abstract"] | |
else: | |
c["has_eval"] = False | |
c["is_evaluated"] = False | |
else: | |
c["has_eval"] = False | |
c["is_evaluated"] = False | |
return cards | |
async def list_evals() -> Dict[str, Any]: | |
# Get evaluated papers from database | |
evaluated_papers = await db.get_evaluated_papers() | |
items: List[Dict[str, Any]] = [] | |
for paper in evaluated_papers: | |
items.append({ | |
"arxiv_id": paper['arxiv_id'], | |
"title": paper['title'], | |
"authors": paper['authors'], | |
"evaluation_date": paper['evaluation_date'], | |
"evaluation_score": paper['evaluation_score'], | |
"evaluation_tags": paper['evaluation_tags'] | |
}) | |
return {"count": len(items), "items": items} | |
async def has_eval(paper_id: str) -> Dict[str, bool]: | |
paper = await db.get_paper(paper_id) | |
exists = paper is not None and paper.get('is_evaluated', False) | |
return {"exists": exists} | |
async def get_paper_details(paper_id: str) -> Dict[str, Any]: | |
"""Get detailed paper information from database""" | |
paper = await db.get_paper(paper_id) | |
if not paper: | |
raise HTTPException(status_code=404, detail="Paper not found") | |
return { | |
"arxiv_id": paper.get('arxiv_id'), | |
"title": paper.get('title'), | |
"authors": paper.get('authors'), | |
"abstract": paper.get('abstract'), | |
"categories": paper.get('categories'), | |
"published_date": paper.get('published_date'), | |
"is_evaluated": paper.get('is_evaluated', False), | |
"evaluation_date": paper.get('evaluation_date'), | |
"created_at": paper.get('created_at'), | |
"updated_at": paper.get('updated_at') | |
} | |
async def get_paper_score(paper_id: str) -> Dict[str, Any]: | |
paper = await db.get_paper(paper_id) | |
print(f"Paper data for {paper_id}:", paper) | |
if not paper or not paper.get('is_evaluated', False): | |
print(f"Paper {paper_id} not found or not evaluated") | |
return {"has_score": False} | |
# Calculate overall score as average of all dimensions (same as radar chart) | |
try: | |
evaluation_content = paper.get('evaluation_content') | |
if evaluation_content: | |
evaluation_json = json.loads(evaluation_content) | |
if 'scores' in evaluation_json: | |
scores = evaluation_json['scores'] | |
values = [ | |
scores.get('task_formalization', 0), | |
scores.get('data_resource_availability', 0), | |
scores.get('input_output_complexity', 0), | |
scores.get('real_world_interaction', 0), | |
scores.get('existing_ai_coverage', 0), | |
scores.get('human_originality', 0), | |
scores.get('safety_ethics', 0), | |
scores.get('technical_maturity_needed', 0), | |
scores.get('three_year_feasibility_pct', 0) / 25, # Convert percentage to 0-4 scale | |
scores.get('overall_automatability', 0) | |
] | |
valid_scores = [v for v in values if v > 0] | |
overall_score = sum(valid_scores) / len(valid_scores) if valid_scores else 0 | |
print(f"Calculated overall score: {overall_score}") | |
return { | |
"has_score": True, | |
"score": overall_score, | |
"evaluation_date": paper.get('evaluation_date') | |
} | |
except Exception as e: | |
print(f"Error calculating overall score: {e}") | |
# Fallback to stored values | |
overall_score = paper.get('overall_score') | |
evaluation_score = paper.get('evaluation_score') | |
print(f"Fallback - Overall score: {overall_score}, Evaluation score: {evaluation_score}") | |
return { | |
"has_score": True, | |
"score": overall_score if overall_score is not None else evaluation_score, | |
"evaluation_date": paper.get('evaluation_date') | |
} | |
async def get_eval(paper_id: str) -> Any: | |
paper = await db.get_paper(paper_id) | |
if not paper or not paper.get('is_evaluated', False): | |
raise HTTPException(status_code=404, detail="Evaluation not found") | |
# Parse evaluation content if it's JSON | |
evaluation_content = paper['evaluation_content'] | |
try: | |
evaluation_json = json.loads(evaluation_content) | |
except json.JSONDecodeError: | |
# If not JSON, create a simple structure | |
evaluation_json = { | |
"evaluation_content": evaluation_content, | |
"arxiv_id": paper_id, | |
"evaluation_date": paper['evaluation_date'], | |
"evaluation_score": paper['evaluation_score'], | |
"evaluation_tags": paper['evaluation_tags'] | |
} | |
return evaluation_json | |
async def get_available_dates() -> Dict[str, Any]: | |
"""Get list of available dates in the cache""" | |
async with db.get_connection() as conn: | |
cursor = await conn.cursor() | |
await cursor.execute('SELECT date_str FROM papers_cache ORDER BY date_str DESC LIMIT 30') | |
rows = await cursor.fetchall() | |
dates = [row['date_str'] for row in rows] | |
return { | |
"available_dates": dates, | |
"count": len(dates) | |
} | |
async def get_cache_status() -> Dict[str, Any]: | |
"""Get cache status and statistics""" | |
async with db.get_connection() as conn: | |
cursor = await conn.cursor() | |
# Get total cached dates | |
await cursor.execute('SELECT COUNT(*) as count FROM papers_cache') | |
total_cached = (await cursor.fetchone())['count'] | |
# Get latest cached date | |
await cursor.execute('SELECT date_str, updated_at FROM latest_date WHERE id = 1') | |
latest_info = await cursor.fetchone() | |
# Get cache age distribution | |
await cursor.execute(''' | |
SELECT | |
CASE | |
WHEN updated_at > datetime('now', '-1 hour') THEN '1 hour' | |
WHEN updated_at > datetime('now', '-24 hours') THEN '24 hours' | |
WHEN updated_at > datetime('now', '-7 days') THEN '7 days' | |
ELSE 'older' | |
END as age_group, | |
COUNT(*) as count | |
FROM papers_cache | |
GROUP BY age_group | |
''') | |
rows = await cursor.fetchall() | |
age_distribution = {row['age_group']: row['count'] for row in rows} | |
return { | |
"total_cached_dates": total_cached, | |
"latest_cached_date": latest_info['date_str'] if latest_info else None, | |
"latest_updated": latest_info['updated_at'] if latest_info else None, | |
"age_distribution": age_distribution | |
} | |
async def get_papers_status() -> Dict[str, Any]: | |
"""Get papers database status and statistics""" | |
papers_count = await db.get_papers_count() | |
# Get recent evaluations | |
recent_papers = await db.get_evaluated_papers() | |
recent_evaluations = [] | |
for paper in recent_papers[:10]: # Get last 10 evaluations | |
recent_evaluations.append({ | |
"arxiv_id": paper['arxiv_id'], | |
"title": paper['title'], | |
"evaluation_date": paper['evaluation_date'], | |
"evaluation_score": paper['evaluation_score'] | |
}) | |
return { | |
"papers_count": papers_count, | |
"recent_evaluations": recent_evaluations | |
} | |
async def insert_paper(paper_data: Dict[str, Any]) -> Dict[str, Any]: | |
"""Insert a new paper into the database""" | |
try: | |
required_fields = ['arxiv_id', 'title', 'authors'] | |
for field in required_fields: | |
if field not in paper_data: | |
raise HTTPException(status_code=400, detail=f"Missing required field: {field}") | |
await db.insert_paper( | |
arxiv_id=paper_data['arxiv_id'], | |
title=paper_data['title'], | |
authors=paper_data['authors'], | |
abstract=paper_data.get('abstract'), | |
categories=paper_data.get('categories'), | |
published_date=paper_data.get('published_date') | |
) | |
return {"message": f"Paper {paper_data['arxiv_id']} inserted successfully"} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to insert paper: {str(e)}") | |
# Global task tracker for concurrent evaluations | |
evaluation_tasks = {} | |
async def evaluate_paper(arxiv_id: str, force_reevaluate: bool = False) -> Dict[str, Any]: | |
"""Evaluate a paper by its arxiv_id""" | |
try: | |
# Check if paper exists in database | |
paper = await db.get_paper(arxiv_id) | |
if not paper: | |
raise HTTPException(status_code=404, detail="Paper not found in database") | |
# Check if already evaluated (unless force_reevaluate is True) | |
if not force_reevaluate and paper.get('is_evaluated', False): | |
return {"message": f"Paper {arxiv_id} already evaluated", "status": "already_evaluated"} | |
# Check if evaluation is already running | |
if arxiv_id in evaluation_tasks and not evaluation_tasks[arxiv_id].done(): | |
return {"message": f"Evaluation already running for {arxiv_id}", "status": "already_running"} | |
# Create PDF URL from arxiv_id | |
pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf" | |
# Run evaluation in background task | |
async def run_eval(): | |
try: | |
# Update paper status to "evaluating" | |
await db.update_paper_status(arxiv_id, "evaluating") | |
logger.info(f"Started {'re-' if force_reevaluate else ''}evaluation for {arxiv_id}") | |
result = await run_evaluation( | |
pdf_path=pdf_url, | |
arxiv_id=arxiv_id, | |
api_key=os.getenv("ANTHROPIC_API_KEY") | |
) | |
# Update paper status to "completed" | |
await db.update_paper_status(arxiv_id, "completed") | |
logger.info(f"{'Re-' if force_reevaluate else ''}evaluation completed for {arxiv_id}") | |
except Exception as e: | |
# Update paper status to "failed" | |
await db.update_paper_status(arxiv_id, "failed") | |
logger.error(f"{'Re-' if force_reevaluate else ''}evaluation failed for {arxiv_id}: {str(e)}") | |
finally: | |
# Clean up task from tracker | |
if arxiv_id in evaluation_tasks: | |
del evaluation_tasks[arxiv_id] | |
# Start evaluation in background and track it | |
task = asyncio.create_task(run_eval()) | |
evaluation_tasks[arxiv_id] = task | |
return { | |
"message": f"{'Re-' if force_reevaluate else ''}evaluation started for paper {arxiv_id}", | |
"status": "started", | |
"pdf_url": pdf_url, | |
"concurrent_tasks": len(evaluation_tasks), | |
"is_reevaluate": force_reevaluate | |
} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to evaluate paper: {str(e)}") | |
async def get_evaluation_status(arxiv_id: str) -> Dict[str, Any]: | |
"""Get evaluation status for a paper""" | |
try: | |
paper = await db.get_paper(arxiv_id) | |
if not paper: | |
raise HTTPException(status_code=404, detail="Paper not found") | |
status = paper.get('evaluation_status', 'not_started') | |
is_evaluated = paper.get('is_evaluated', False) | |
# Check if task is currently running | |
is_running = arxiv_id in evaluation_tasks and not evaluation_tasks[arxiv_id].done() | |
return { | |
"arxiv_id": arxiv_id, | |
"status": status, | |
"is_evaluated": is_evaluated, | |
"is_running": is_running, | |
"evaluation_date": paper.get('evaluation_date'), | |
"evaluation_score": paper.get('evaluation_score') | |
} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to get evaluation status: {str(e)}") | |
async def reevaluate_paper(arxiv_id: str) -> Dict[str, Any]: | |
"""Re-evaluate a paper by its arxiv_id""" | |
try: | |
# Check if paper exists in database | |
paper = await db.get_paper(arxiv_id) | |
if not paper: | |
raise HTTPException(status_code=404, detail="Paper not found in database") | |
# Check if evaluation is already running | |
if arxiv_id in evaluation_tasks and not evaluation_tasks[arxiv_id].done(): | |
return {"message": f"Evaluation already running for {arxiv_id}", "status": "already_running"} | |
# Create PDF URL from arxiv_id | |
pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf" | |
# Run re-evaluation in background task | |
async def run_reeval(): | |
try: | |
# Update paper status to "evaluating" | |
await db.update_paper_status(arxiv_id, "evaluating") | |
logger.info(f"Started re-evaluation for {arxiv_id}") | |
result = await run_evaluation( | |
pdf_path=pdf_url, | |
arxiv_id=arxiv_id, | |
api_key=os.getenv("ANTHROPIC_API_KEY") | |
) | |
# Update paper status to "completed" | |
await db.update_paper_status(arxiv_id, "completed") | |
logger.info(f"Re-evaluation completed for {arxiv_id}") | |
except Exception as e: | |
# Update paper status to "failed" | |
await db.update_paper_status(arxiv_id, "failed") | |
logger.error(f"Re-evaluation failed for {arxiv_id}: {str(e)}") | |
finally: | |
# Clean up task from tracker | |
if arxiv_id in evaluation_tasks: | |
del evaluation_tasks[arxiv_id] | |
# Start re-evaluation in background and track it | |
task = asyncio.create_task(run_reeval()) | |
evaluation_tasks[arxiv_id] = task | |
return { | |
"message": f"Re-evaluation started for paper {arxiv_id}", | |
"status": "started", | |
"pdf_url": pdf_url, | |
"concurrent_tasks": len(evaluation_tasks), | |
"is_reevaluate": True | |
} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to re-evaluate paper: {str(e)}") | |
async def get_active_evaluation_tasks() -> Dict[str, Any]: | |
"""Get list of currently running evaluation tasks""" | |
active_tasks = {} | |
for arxiv_id, task in evaluation_tasks.items(): | |
if not task.done(): | |
active_tasks[arxiv_id] = { | |
"status": "running", | |
"done": task.done(), | |
"cancelled": task.cancelled() | |
} | |
return { | |
"active_tasks": active_tasks, | |
"total_active": len(active_tasks), | |
"total_tracked": len(evaluation_tasks) | |
} | |
async def clear_cache() -> Dict[str, str]: | |
"""Clear all cached data""" | |
async with db.get_connection() as conn: | |
cursor = await conn.cursor() | |
await cursor.execute('DELETE FROM papers_cache') | |
await conn.commit() | |
return {"message": "Cache cleared successfully"} | |
async def refresh_cache(date_str: str) -> Dict[str, Any]: | |
"""Force refresh cache for a specific date""" | |
try: | |
# Initialize HuggingFaceDailyPapers | |
hf_daily = HuggingFaceDailyPapers() | |
# Force fetch fresh data | |
actual_date, html = await hf_daily.fetch_daily_html(date_str) | |
cards = hf_daily.parse_daily_cards(html) | |
# Cache the results | |
await db.cache_papers(actual_date, html, cards) | |
return { | |
"message": f"Cache refreshed for {actual_date}", | |
"cards_count": len(cards) | |
} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Failed to refresh cache: {str(e)}") | |
async def get_favicon(): | |
"""Serve favicon to prevent 404 errors""" | |
# Return a simple SVG favicon as text | |
favicon_svg = '''<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"> | |
<rect width="100" height="100" fill="#3b82f6"/> | |
<text x="50" y="65" font-family="Arial, sans-serif" font-size="50" text-anchor="middle" fill="white">π</text> | |
</svg>''' | |
from fastapi.responses import Response | |
return Response(content=favicon_svg, media_type="image/svg+xml") | |
async def get_styles(): | |
"""Serve CSS with no-cache headers to prevent caching issues during development""" | |
response = FileResponse("frontend/styles.css", media_type="text/css") | |
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" | |
response.headers["Pragma"] = "no-cache" | |
response.headers["Expires"] = "0" | |
return response | |
async def main(): | |
# Parse command line arguments | |
args = parse_args() | |
# Initialize the configuration | |
config.init_config(args.config, args) | |
# Initialize the logger | |
logger.init_logger(config=config) | |
logger.info(f"| Logger initialized at: {config.log_path}") | |
logger.info(f"| Config:\n{config.pretty_text}") | |
# Initialize the database | |
await db.init_db(config=config) | |
logger.info(f"| Database initialized at: {config.db_path}") | |
# Load Frontend | |
os.makedirs(config.frontend_path, exist_ok=True) | |
app.mount("/", StaticFiles(directory=config.frontend_path, html=True), name="static") | |
logger.info(f"| Frontend initialized at: {config.frontend_path}") | |
# Use port 7860 for Hugging Face Spaces, fallback to 7860 for local development | |
config_uvicorn = uvicorn.Config(app, host="0.0.0.0", port=7860) | |
server = uvicorn.Server(config_uvicorn) | |
await server.serve() | |
if __name__ == "__main__": | |
asyncio.run(main()) |