Spaces:
Sleeping
Sleeping
Update genesis/literature.py
Browse files- genesis/literature.py +35 -54
genesis/literature.py
CHANGED
@@ -1,69 +1,50 @@
|
|
1 |
# genesis/literature.py
|
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
import requests
|
4 |
-
from typing import List, Dict
|
5 |
|
6 |
-
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
|
7 |
NCBI_EMAIL = os.getenv("NCBI_EMAIL")
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
def search_pubmed(query
|
10 |
-
"""
|
11 |
-
Search PubMed for research articles.
|
12 |
-
|
13 |
-
sort: "relevance", "pub date", or "most recent"
|
14 |
-
Returns: List of dicts with title, authors, journal, year, and link.
|
15 |
-
"""
|
16 |
try:
|
17 |
-
|
18 |
-
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
19 |
-
search_params = {
|
20 |
"db": "pubmed",
|
21 |
"term": query,
|
22 |
"retmax": max_results,
|
23 |
-
"
|
24 |
-
"email": NCBI_EMAIL
|
25 |
-
"sort": "relevance" if sort == "relevance" else "pub+date" if sort == "most recent" else "pub+date",
|
26 |
-
"retmode": "json"
|
27 |
}
|
28 |
-
r = requests.get(
|
29 |
r.raise_for_status()
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
# Step 2: Fetch summaries for IDs
|
35 |
-
summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
|
36 |
-
summary_params = {
|
37 |
-
"db": "pubmed",
|
38 |
-
"id": ",".join(ids),
|
39 |
-
"retmode": "json"
|
40 |
-
}
|
41 |
-
s = requests.get(summary_url, params=summary_params, timeout=10)
|
42 |
-
s.raise_for_status()
|
43 |
-
summaries = s.json().get("result", {})
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
"year": paper.get("pubdate", "N/A"),
|
54 |
-
"pmid": pmid,
|
55 |
-
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
|
56 |
-
})
|
57 |
-
return papers
|
58 |
except Exception as e:
|
59 |
-
|
60 |
-
return []
|
61 |
|
62 |
-
def
|
63 |
-
"""
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
1 |
# genesis/literature.py
|
2 |
+
"""
|
3 |
+
Literature intelligence for GENESIS-AI
|
4 |
+
Fetches biomedical, biotech, and synthetic biology publications.
|
5 |
+
"""
|
6 |
+
|
7 |
import os
|
8 |
import requests
|
|
|
9 |
|
|
|
10 |
NCBI_EMAIL = os.getenv("NCBI_EMAIL")
|
11 |
+
PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
12 |
+
PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
13 |
+
|
14 |
+
CHEMBL_API = "https://www.ebi.ac.uk/chembl/api/data/molecule"
|
15 |
|
16 |
+
def search_pubmed(query, max_results=10):
|
17 |
+
"""Search PubMed and return article IDs."""
|
|
|
|
|
|
|
|
|
|
|
18 |
try:
|
19 |
+
params = {
|
|
|
|
|
20 |
"db": "pubmed",
|
21 |
"term": query,
|
22 |
"retmax": max_results,
|
23 |
+
"retmode": "json",
|
24 |
+
"email": NCBI_EMAIL
|
|
|
|
|
25 |
}
|
26 |
+
r = requests.get(PUBMED_SEARCH_URL, params=params)
|
27 |
r.raise_for_status()
|
28 |
+
return r.json()
|
29 |
+
except Exception as e:
|
30 |
+
return {"error": str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
def fetch_pubmed_details(id_list):
|
33 |
+
"""Fetch details for a list of PubMed IDs."""
|
34 |
+
try:
|
35 |
+
ids = ",".join(id_list)
|
36 |
+
params = {"db": "pubmed", "id": ids, "retmode": "xml"}
|
37 |
+
r = requests.get(PUBMED_FETCH_URL, params=params)
|
38 |
+
r.raise_for_status()
|
39 |
+
return r.text # XML response
|
|
|
|
|
|
|
|
|
|
|
40 |
except Exception as e:
|
41 |
+
return {"error": str(e)}
|
|
|
42 |
|
43 |
+
def search_chembl(query):
|
44 |
+
"""Search ChEMBL molecules by name."""
|
45 |
+
try:
|
46 |
+
r = requests.get(f"{CHEMBL_API}?molecule_synonyms__icontains={query}")
|
47 |
+
r.raise_for_status()
|
48 |
+
return r.json()
|
49 |
+
except Exception as e:
|
50 |
+
return {"error": str(e)}
|