Spaces:
Running
Running
File size: 33,301 Bytes
21fd477 6c5cf21 583741e 21fd477 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e d3ff7fa 583741e d3ff7fa 583741e 21fd477 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e a1a8349 583741e 21fd477 78f6650 583741e 78f6650 583741e 36a31d0 78f6650 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 |
import os
import sys
from dotenv import load_dotenv
load_dotenv(verbose=True)
from pathlib import Path
import argparse
from mmengine import DictAction
from datetime import date, datetime, timedelta
from typing import Any, Dict, List, Optional
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
import httpx
from bs4 import BeautifulSoup
import json
import asyncio
import uvicorn
root = str(Path(__file__).parent)
sys.path.append(root)
from src.database import db
from src.logger import logger
from src.config import config
from src.crawl import HuggingFaceDailyPapers
from src.agents.evaluator import run_evaluation
app = FastAPI(title="PaperAgent")
# Local development: allow same-origin and localhost
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def parse_args():
parser = argparse.ArgumentParser(description='main')
parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path")
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
# Remove the find_next_available_date function since we're using HuggingFace's redirect mechanism
@app.get("/api/daily")
async def get_daily(date_str: Optional[str] = None, direction: Optional[str] = None) -> Dict[str, Any]:
target_date = date_str or date.today().isoformat()
# Initialize HuggingFaceDailyPapers
hf_daily = HuggingFaceDailyPapers()
# First, check if we have fresh cache for the requested date
cached_data = await db.get_cached_papers(target_date)
if cached_data and await db.is_cache_fresh(target_date):
print(f"Using cached data for {target_date}")
return {
"date": target_date,
"requested_date": target_date,
"cards": cached_data['cards'],
"fallback_used": False,
"cached": True,
"cached_at": cached_data['cached_at']
}
# Handle different navigation directions
if direction == "prev":
# For previous navigation, use redirect mechanism to find the most recent available date
try:
actual_date, html = await hf_daily.fetch_daily_html(target_date)
print(f"Previous navigation: fetched {actual_date} (requested {target_date})")
# If we got redirected to a different date, that's our fallback
if actual_date != target_date:
print(f"Redirected from {target_date} to {actual_date}")
# Check if the redirected date has fresh cache
cached_data = await db.get_cached_papers(actual_date)
if cached_data and await db.is_cache_fresh(actual_date):
print(f"Using cached data for redirected date {actual_date}")
return {
"date": actual_date,
"requested_date": target_date,
"cards": cached_data['cards'],
"fallback_used": True,
"cached": True,
"cached_at": cached_data['cached_at']
}
# Process the HTML we got
cards = hf_daily.parse_daily_cards(html)
enriched_cards = await enrich_cards(cards)
# Cache the results for the redirected date
await db.cache_papers(actual_date, html, enriched_cards)
return {
"date": actual_date,
"requested_date": target_date,
"cards": enriched_cards,
"fallback_used": True,
"cached": False
}
# If we got the exact date we requested, process normally
cards = hf_daily.parse_daily_cards(html)
enriched_cards = await enrich_cards(cards)
await db.cache_papers(actual_date, html, enriched_cards)
return {
"date": actual_date,
"requested_date": target_date,
"cards": enriched_cards,
"fallback_used": False,
"cached": False
}
except Exception as e:
print(f"Failed to fetch {target_date} for previous navigation: {e}")
# Fallback to cached data if available
cached_data = await db.get_cached_papers(target_date)
if cached_data:
return {
"date": target_date,
"requested_date": target_date,
"cards": cached_data['cards'],
"fallback_used": False,
"cached": True,
"cached_at": cached_data['cached_at']
}
raise HTTPException(status_code=503, detail="Unable to fetch papers and no cache available")
elif direction == "next":
# For next navigation, we need to find the next available date
# First try the exact date
try:
actual_date, html = await hf_daily.fetch_daily_html(target_date)
print(f"Next navigation: fetched {actual_date} (requested {target_date})")
# If we got the exact date we requested, that's perfect
if actual_date == target_date:
cards = hf_daily.parse_daily_cards(html)
enriched_cards = await enrich_cards(cards)
await db.cache_papers(actual_date, html, enriched_cards)
return {
"date": actual_date,
"requested_date": target_date,
"cards": enriched_cards,
"fallback_used": False,
"cached": False
}
# If we got redirected, it means the requested date doesn't exist
# We need to find the next available date by incrementing
print(f"Requested date {target_date} doesn't exist, searching for next available date")
# Try to find the next available date by incrementing
next_date = await find_next_available_date_forward(target_date)
if next_date:
cached_data = await db.get_cached_papers(next_date)
if cached_data and await db.is_cache_fresh(next_date):
print(f"Using cached data for next available date {next_date}")
return {
"date": next_date,
"requested_date": target_date,
"cards": cached_data['cards'],
"fallback_used": True,
"cached": True,
"cached_at": cached_data['cached_at']
}
# Fetch the next available date
actual_date, html = await hf_daily.fetch_daily_html(next_date)
cards = hf_daily.parse_daily_cards(html)
enriched_cards = await enrich_cards(cards)
await db.cache_papers(actual_date, html, enriched_cards)
return {
"date": actual_date,
"requested_date": target_date,
"cards": enriched_cards,
"fallback_used": True,
"cached": False
}
# If no next date found, return empty
return {
"date": target_date,
"requested_date": target_date,
"cards": [],
"fallback_used": False,
"cached": False
}
except Exception as e:
print(f"Failed to fetch {target_date} for next navigation: {e}")
# Try to find next available date
next_date = await find_next_available_date_forward(target_date)
if next_date:
cached_data = await db.get_cached_papers(next_date)
if cached_data:
return {
"date": next_date,
"requested_date": target_date,
"cards": cached_data['cards'],
"fallback_used": True,
"cached": True,
"cached_at": cached_data['cached_at']
}
# If no cache available, return error
raise HTTPException(status_code=503, detail="Unable to fetch papers and no cache available")
else:
# No direction specified, try the exact date first
try:
actual_date, html = await hf_daily.fetch_daily_html(target_date)
print(f"Direct fetch: fetched {actual_date} (requested {target_date})")
# If we got redirected, that's our fallback
if actual_date != target_date:
print(f"Redirected from {target_date} to {actual_date}")
# Check if the redirected date has fresh cache
cached_data = await db.get_cached_papers(actual_date)
if cached_data and await db.is_cache_fresh(actual_date):
print(f"Using cached data for redirected date {actual_date}")
return {
"date": actual_date,
"requested_date": target_date,
"cards": cached_data['cards'],
"fallback_used": True,
"cached": True,
"cached_at": cached_data['cached_at']
}
# Process the HTML we got
cards = hf_daily.parse_daily_cards(html)
enriched_cards = await enrich_cards(cards)
# Cache the results for the redirected date
await db.cache_papers(actual_date, html, enriched_cards)
return {
"date": actual_date,
"requested_date": target_date,
"cards": enriched_cards,
"fallback_used": True,
"cached": False
}
# If we got the exact date we requested, process normally
cards = hf_daily.parse_daily_cards(html)
enriched_cards = await enrich_cards(cards)
await db.cache_papers(actual_date, html, enriched_cards)
return {
"date": actual_date,
"requested_date": target_date,
"cards": enriched_cards,
"fallback_used": False,
"cached": False
}
except Exception as e:
print(f"Failed to fetch {target_date}: {e}")
# If everything fails, return cached data if available
cached_data = await db.get_cached_papers(target_date)
if cached_data:
return {
"date": target_date,
"requested_date": target_date,
"cards": cached_data['cards'],
"fallback_used": False,
"cached": True,
"cached_at": cached_data['cached_at']
}
# If no cache available, return error
raise HTTPException(status_code=503, detail="Unable to fetch papers and no cache available")
async def find_next_available_date_forward(start_date: str, max_attempts: int = 30) -> Optional[str]:
"""Find the next available date by incrementing and checking"""
from datetime import datetime, timedelta
current_date = datetime.strptime(start_date, "%Y-%m-%d")
for i in range(max_attempts):
current_date += timedelta(days=1)
date_str = current_date.strftime("%Y-%m-%d")
# Check if we have cache for this date
cached_data = await db.get_cached_papers(date_str)
if cached_data:
return date_str
# Try to fetch this date (but don't wait too long)
try:
import httpx
from src.crawl.huggingface_daily import HuggingFaceDailyPapers
hf_daily = HuggingFaceDailyPapers()
# Use a shorter timeout for quick checks
async with httpx.AsyncClient(timeout=5) as client:
actual_date, html = await hf_daily.fetch_daily_html(date_str)
if actual_date == date_str:
return date_str
except Exception as e:
print(f"Failed to check {date_str}: {e}")
continue
return None
async def enrich_cards(cards):
"""Enrich cards with paper details from database"""
for c in cards:
arxiv_id = c.get("arxiv_id")
if arxiv_id:
paper = await db.get_paper(arxiv_id)
if paper:
# Add evaluation status
c["has_eval"] = paper.get('is_evaluated', False)
c["is_evaluated"] = paper.get('is_evaluated', False)
# Add evaluation details if available
if paper.get('is_evaluated'):
c["evaluation_score"] = paper.get('evaluation_score')
c["overall_score"] = paper.get('overall_score')
c["evaluation_date"] = paper.get('evaluation_date')
c["evaluation_tags"] = paper.get('evaluation_tags')
# Add paper details (use cached data as fallback)
if not c.get("title") and paper.get("title"):
c["title"] = paper["title"]
if not c.get("authors") and paper.get("authors"):
c["authors"] = paper["authors"]
if not c.get("abstract") and paper.get("abstract"):
c["abstract"] = paper["abstract"]
else:
c["has_eval"] = False
c["is_evaluated"] = False
else:
c["has_eval"] = False
c["is_evaluated"] = False
return cards
@app.get("/api/evals")
async def list_evals() -> Dict[str, Any]:
# Get evaluated papers from database
evaluated_papers = await db.get_evaluated_papers()
items: List[Dict[str, Any]] = []
for paper in evaluated_papers:
items.append({
"arxiv_id": paper['arxiv_id'],
"title": paper['title'],
"authors": paper['authors'],
"evaluation_date": paper['evaluation_date'],
"evaluation_score": paper['evaluation_score'],
"evaluation_tags": paper['evaluation_tags']
})
return {"count": len(items), "items": items}
@app.get("/api/has-eval/{paper_id}")
async def has_eval(paper_id: str) -> Dict[str, bool]:
paper = await db.get_paper(paper_id)
exists = paper is not None and paper.get('is_evaluated', False)
return {"exists": exists}
@app.get("/api/paper/{paper_id}")
async def get_paper_details(paper_id: str) -> Dict[str, Any]:
"""Get detailed paper information from database"""
paper = await db.get_paper(paper_id)
if not paper:
raise HTTPException(status_code=404, detail="Paper not found")
return {
"arxiv_id": paper.get('arxiv_id'),
"title": paper.get('title'),
"authors": paper.get('authors'),
"abstract": paper.get('abstract'),
"categories": paper.get('categories'),
"published_date": paper.get('published_date'),
"is_evaluated": paper.get('is_evaluated', False),
"evaluation_date": paper.get('evaluation_date'),
"created_at": paper.get('created_at'),
"updated_at": paper.get('updated_at')
}
@app.get("/api/paper-score/{paper_id}")
async def get_paper_score(paper_id: str) -> Dict[str, Any]:
paper = await db.get_paper(paper_id)
print(f"Paper data for {paper_id}:", paper)
if not paper or not paper.get('is_evaluated', False):
print(f"Paper {paper_id} not found or not evaluated")
return {"has_score": False}
# Calculate overall score as average of all dimensions (same as radar chart)
try:
evaluation_content = paper.get('evaluation_content')
if evaluation_content:
evaluation_json = json.loads(evaluation_content)
if 'scores' in evaluation_json:
scores = evaluation_json['scores']
values = [
scores.get('task_formalization', 0),
scores.get('data_resource_availability', 0),
scores.get('input_output_complexity', 0),
scores.get('real_world_interaction', 0),
scores.get('existing_ai_coverage', 0),
scores.get('human_originality', 0),
scores.get('safety_ethics', 0),
scores.get('technical_maturity_needed', 0),
scores.get('three_year_feasibility_pct', 0) / 25, # Convert percentage to 0-4 scale
scores.get('overall_automatability', 0)
]
valid_scores = [v for v in values if v > 0]
overall_score = sum(valid_scores) / len(valid_scores) if valid_scores else 0
print(f"Calculated overall score: {overall_score}")
return {
"has_score": True,
"score": overall_score,
"evaluation_date": paper.get('evaluation_date')
}
except Exception as e:
print(f"Error calculating overall score: {e}")
# Fallback to stored values
overall_score = paper.get('overall_score')
evaluation_score = paper.get('evaluation_score')
print(f"Fallback - Overall score: {overall_score}, Evaluation score: {evaluation_score}")
return {
"has_score": True,
"score": overall_score if overall_score is not None else evaluation_score,
"evaluation_date": paper.get('evaluation_date')
}
@app.get("/api/eval/{paper_id}")
async def get_eval(paper_id: str) -> Any:
paper = await db.get_paper(paper_id)
if not paper or not paper.get('is_evaluated', False):
raise HTTPException(status_code=404, detail="Evaluation not found")
# Parse evaluation content if it's JSON
evaluation_content = paper['evaluation_content']
try:
evaluation_json = json.loads(evaluation_content)
except json.JSONDecodeError:
# If not JSON, create a simple structure
evaluation_json = {
"evaluation_content": evaluation_content,
"arxiv_id": paper_id,
"evaluation_date": paper['evaluation_date'],
"evaluation_score": paper['evaluation_score'],
"evaluation_tags": paper['evaluation_tags']
}
return evaluation_json
@app.get("/api/available-dates")
async def get_available_dates() -> Dict[str, Any]:
"""Get list of available dates in the cache"""
async with db.get_connection() as conn:
cursor = await conn.cursor()
await cursor.execute('SELECT date_str FROM papers_cache ORDER BY date_str DESC LIMIT 30')
rows = await cursor.fetchall()
dates = [row['date_str'] for row in rows]
return {
"available_dates": dates,
"count": len(dates)
}
@app.get("/api/cache/status")
async def get_cache_status() -> Dict[str, Any]:
"""Get cache status and statistics"""
async with db.get_connection() as conn:
cursor = await conn.cursor()
# Get total cached dates
await cursor.execute('SELECT COUNT(*) as count FROM papers_cache')
total_cached = (await cursor.fetchone())['count']
# Get latest cached date
await cursor.execute('SELECT date_str, updated_at FROM latest_date WHERE id = 1')
latest_info = await cursor.fetchone()
# Get cache age distribution
await cursor.execute('''
SELECT
CASE
WHEN updated_at > datetime('now', '-1 hour') THEN '1 hour'
WHEN updated_at > datetime('now', '-24 hours') THEN '24 hours'
WHEN updated_at > datetime('now', '-7 days') THEN '7 days'
ELSE 'older'
END as age_group,
COUNT(*) as count
FROM papers_cache
GROUP BY age_group
''')
rows = await cursor.fetchall()
age_distribution = {row['age_group']: row['count'] for row in rows}
return {
"total_cached_dates": total_cached,
"latest_cached_date": latest_info['date_str'] if latest_info else None,
"latest_updated": latest_info['updated_at'] if latest_info else None,
"age_distribution": age_distribution
}
@app.get("/api/papers/status")
async def get_papers_status() -> Dict[str, Any]:
"""Get papers database status and statistics"""
papers_count = await db.get_papers_count()
# Get recent evaluations
recent_papers = await db.get_evaluated_papers()
recent_evaluations = []
for paper in recent_papers[:10]: # Get last 10 evaluations
recent_evaluations.append({
"arxiv_id": paper['arxiv_id'],
"title": paper['title'],
"evaluation_date": paper['evaluation_date'],
"evaluation_score": paper['evaluation_score']
})
return {
"papers_count": papers_count,
"recent_evaluations": recent_evaluations
}
@app.post("/api/papers/insert")
async def insert_paper(paper_data: Dict[str, Any]) -> Dict[str, Any]:
"""Insert a new paper into the database"""
try:
required_fields = ['arxiv_id', 'title', 'authors']
for field in required_fields:
if field not in paper_data:
raise HTTPException(status_code=400, detail=f"Missing required field: {field}")
await db.insert_paper(
arxiv_id=paper_data['arxiv_id'],
title=paper_data['title'],
authors=paper_data['authors'],
abstract=paper_data.get('abstract'),
categories=paper_data.get('categories'),
published_date=paper_data.get('published_date')
)
return {"message": f"Paper {paper_data['arxiv_id']} inserted successfully"}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to insert paper: {str(e)}")
# Global task tracker for concurrent evaluations
evaluation_tasks = {}
@app.post("/api/papers/evaluate/{arxiv_id}")
async def evaluate_paper(arxiv_id: str, force_reevaluate: bool = False) -> Dict[str, Any]:
"""Evaluate a paper by its arxiv_id"""
try:
# Check if paper exists in database
paper = await db.get_paper(arxiv_id)
if not paper:
raise HTTPException(status_code=404, detail="Paper not found in database")
# Check if already evaluated (unless force_reevaluate is True)
if not force_reevaluate and paper.get('is_evaluated', False):
return {"message": f"Paper {arxiv_id} already evaluated", "status": "already_evaluated"}
# Check if evaluation is already running
if arxiv_id in evaluation_tasks and not evaluation_tasks[arxiv_id].done():
return {"message": f"Evaluation already running for {arxiv_id}", "status": "already_running"}
# Create PDF URL from arxiv_id
pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
# Run evaluation in background task
async def run_eval():
try:
# Update paper status to "evaluating"
await db.update_paper_status(arxiv_id, "evaluating")
logger.info(f"Started {'re-' if force_reevaluate else ''}evaluation for {arxiv_id}")
result = await run_evaluation(
pdf_path=pdf_url,
arxiv_id=arxiv_id,
api_key=os.getenv("ANTHROPIC_API_KEY")
)
# Update paper status to "completed"
await db.update_paper_status(arxiv_id, "completed")
logger.info(f"{'Re-' if force_reevaluate else ''}evaluation completed for {arxiv_id}")
except Exception as e:
# Update paper status to "failed"
await db.update_paper_status(arxiv_id, "failed")
logger.error(f"{'Re-' if force_reevaluate else ''}evaluation failed for {arxiv_id}: {str(e)}")
finally:
# Clean up task from tracker
if arxiv_id in evaluation_tasks:
del evaluation_tasks[arxiv_id]
# Start evaluation in background and track it
task = asyncio.create_task(run_eval())
evaluation_tasks[arxiv_id] = task
return {
"message": f"{'Re-' if force_reevaluate else ''}evaluation started for paper {arxiv_id}",
"status": "started",
"pdf_url": pdf_url,
"concurrent_tasks": len(evaluation_tasks),
"is_reevaluate": force_reevaluate
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to evaluate paper: {str(e)}")
@app.get("/api/papers/evaluate/{arxiv_id}/status")
async def get_evaluation_status(arxiv_id: str) -> Dict[str, Any]:
"""Get evaluation status for a paper"""
try:
paper = await db.get_paper(arxiv_id)
if not paper:
raise HTTPException(status_code=404, detail="Paper not found")
status = paper.get('evaluation_status', 'not_started')
is_evaluated = paper.get('is_evaluated', False)
# Check if task is currently running
is_running = arxiv_id in evaluation_tasks and not evaluation_tasks[arxiv_id].done()
return {
"arxiv_id": arxiv_id,
"status": status,
"is_evaluated": is_evaluated,
"is_running": is_running,
"evaluation_date": paper.get('evaluation_date'),
"evaluation_score": paper.get('evaluation_score')
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to get evaluation status: {str(e)}")
@app.post("/api/papers/reevaluate/{arxiv_id}")
async def reevaluate_paper(arxiv_id: str) -> Dict[str, Any]:
"""Re-evaluate a paper by its arxiv_id"""
try:
# Check if paper exists in database
paper = await db.get_paper(arxiv_id)
if not paper:
raise HTTPException(status_code=404, detail="Paper not found in database")
# Check if evaluation is already running
if arxiv_id in evaluation_tasks and not evaluation_tasks[arxiv_id].done():
return {"message": f"Evaluation already running for {arxiv_id}", "status": "already_running"}
# Create PDF URL from arxiv_id
pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
# Run re-evaluation in background task
async def run_reeval():
try:
# Update paper status to "evaluating"
await db.update_paper_status(arxiv_id, "evaluating")
logger.info(f"Started re-evaluation for {arxiv_id}")
result = await run_evaluation(
pdf_path=pdf_url,
arxiv_id=arxiv_id,
api_key=os.getenv("ANTHROPIC_API_KEY")
)
# Update paper status to "completed"
await db.update_paper_status(arxiv_id, "completed")
logger.info(f"Re-evaluation completed for {arxiv_id}")
except Exception as e:
# Update paper status to "failed"
await db.update_paper_status(arxiv_id, "failed")
logger.error(f"Re-evaluation failed for {arxiv_id}: {str(e)}")
finally:
# Clean up task from tracker
if arxiv_id in evaluation_tasks:
del evaluation_tasks[arxiv_id]
# Start re-evaluation in background and track it
task = asyncio.create_task(run_reeval())
evaluation_tasks[arxiv_id] = task
return {
"message": f"Re-evaluation started for paper {arxiv_id}",
"status": "started",
"pdf_url": pdf_url,
"concurrent_tasks": len(evaluation_tasks),
"is_reevaluate": True
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to re-evaluate paper: {str(e)}")
@app.get("/api/papers/evaluate/active-tasks")
async def get_active_evaluation_tasks() -> Dict[str, Any]:
"""Get list of currently running evaluation tasks"""
active_tasks = {}
for arxiv_id, task in evaluation_tasks.items():
if not task.done():
active_tasks[arxiv_id] = {
"status": "running",
"done": task.done(),
"cancelled": task.cancelled()
}
return {
"active_tasks": active_tasks,
"total_active": len(active_tasks),
"total_tracked": len(evaluation_tasks)
}
@app.post("/api/cache/clear")
async def clear_cache() -> Dict[str, str]:
"""Clear all cached data"""
async with db.get_connection() as conn:
cursor = await conn.cursor()
await cursor.execute('DELETE FROM papers_cache')
await conn.commit()
return {"message": "Cache cleared successfully"}
@app.post("/api/cache/refresh/{date_str}")
async def refresh_cache(date_str: str) -> Dict[str, Any]:
"""Force refresh cache for a specific date"""
try:
# Initialize HuggingFaceDailyPapers
hf_daily = HuggingFaceDailyPapers()
# Force fetch fresh data
actual_date, html = await hf_daily.fetch_daily_html(date_str)
cards = hf_daily.parse_daily_cards(html)
# Cache the results
await db.cache_papers(actual_date, html, cards)
return {
"message": f"Cache refreshed for {actual_date}",
"cards_count": len(cards)
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to refresh cache: {str(e)}")
@app.get("/favicon.ico")
async def get_favicon():
"""Serve favicon to prevent 404 errors"""
# Return a simple SVG favicon as text
favicon_svg = '''<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
<rect width="100" height="100" fill="#3b82f6"/>
<text x="50" y="65" font-family="Arial, sans-serif" font-size="50" text-anchor="middle" fill="white">π</text>
</svg>'''
from fastapi.responses import Response
return Response(content=favicon_svg, media_type="image/svg+xml")
@app.get("/styles.css")
async def get_styles():
"""Serve CSS with no-cache headers to prevent caching issues during development"""
response = FileResponse("frontend/styles.css", media_type="text/css")
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
return response
async def main():
# Parse command line arguments
args = parse_args()
# Initialize the configuration
config.init_config(args.config, args)
# Initialize the logger
logger.init_logger(config=config)
logger.info(f"| Logger initialized at: {config.log_path}")
logger.info(f"| Config:\n{config.pretty_text}")
# Initialize the database
await db.init_db(config=config)
logger.info(f"| Database initialized at: {config.db_path}")
# Load Frontend
os.makedirs(config.frontend_path, exist_ok=True)
app.mount("/", StaticFiles(directory=config.frontend_path, html=True), name="static")
logger.info(f"| Frontend initialized at: {config.frontend_path}")
# Use port 7860 for Hugging Face Spaces, fallback to 7860 for local development
config_uvicorn = uvicorn.Config(app, host="0.0.0.0", port=7860)
server = uvicorn.Server(config_uvicorn)
await server.serve()
if __name__ == "__main__":
asyncio.run(main()) |