mgbam commited on
Commit
cdc75a8
·
verified ·
1 Parent(s): ec0d077

Update genesis/api_clients/umls_api.py

Browse files
Files changed (1) hide show
  1. genesis/api_clients/umls_api.py +67 -68
genesis/api_clients/umls_api.py CHANGED
@@ -1,94 +1,93 @@
1
  # genesis/api_clients/umls_api.py
2
- import requests
3
  import os
4
- from urllib.parse import quote
5
 
6
- UMLS_API_KEY = os.getenv("UMLS_API_KEY") # Store in Hugging Face secrets
 
7
  UMLS_BASE_URL = "https://uts-ws.nlm.nih.gov/rest"
8
 
9
- if not UMLS_API_KEY:
10
- raise ValueError("UMLS_API_KEY not found in environment variables.")
11
 
12
- # Get UMLS authentication ticket
13
- def get_umls_ticket():
14
  """
15
- Retrieve a UMLS authentication ticket.
16
  """
17
- auth_url = f"https://utslogin.nlm.nih.gov/cas/v1/api-key"
18
- response = requests.post(auth_url, data={"apikey": UMLS_API_KEY})
19
- response.raise_for_status()
20
- return response.text.split("<form")[0].strip()
21
 
 
 
 
22
 
23
- def search_umls(term: str, page_size: int = 10):
 
24
  """
25
- Search UMLS for a term (CUI, concept name, or synonym).
26
  """
27
- ticket = get_umls_ticket()
28
- encoded_term = quote(term)
29
- url = f"{UMLS_BASE_URL}/search/current?string={encoded_term}&ticket={ticket}&pageSize={page_size}"
30
- response = requests.get(url)
31
- response.raise_for_status()
32
- results = response.json().get("result", {}).get("results", [])
33
- return [
34
- {
35
- "ui": r.get("ui"),
36
- "name": r.get("name"),
37
- "rootSource": r.get("rootSource"),
38
- }
39
- for r in results
40
- if r.get("ui") != "NONE"
41
- ]
42
 
43
 
44
- def get_concept_details(cui: str):
45
  """
46
- Retrieve details for a specific UMLS concept (CUI).
47
  """
48
- ticket = get_umls_ticket()
49
- url = f"{UMLS_BASE_URL}/content/current/CUI/{cui}?ticket={ticket}"
50
- response = requests.get(url)
51
- response.raise_for_status()
52
- return response.json().get("result", {})
53
 
 
 
 
 
54
 
55
- def get_concept_synonyms(cui: str):
56
- """
57
- Get synonyms for a given UMLS concept.
58
- """
59
- concept = get_concept_details(cui)
60
- synonyms = []
61
- for atom in concept.get("atoms", []):
62
- synonyms.append(atom.get("name"))
63
- return list(set(synonyms))
64
 
 
65
 
66
- def get_semantic_types(cui: str):
 
67
  """
68
- Retrieve semantic types for a UMLS concept.
69
  """
70
- ticket = get_umls_ticket()
71
- url = f"{UMLS_BASE_URL}/content/current/CUI/{cui}/definitions?ticket={ticket}"
72
- response = requests.get(url)
73
- response.raise_for_status()
74
- definitions = response.json().get("result", [])
75
- return [d.get("rootSource") for d in definitions]
 
 
76
 
77
 
78
- def cross_map_term(term: str):
79
  """
80
- Map a term across all major UMLS vocabularies.
81
  """
82
- mappings = {}
83
- search_results = search_umls(term, page_size=5)
84
- for result in search_results:
85
- cui = result["ui"]
86
- synonyms = get_concept_synonyms(cui)
87
- semantic_types = get_semantic_types(cui)
88
- mappings[cui] = {
89
- "name": result["name"],
90
- "rootSource": result["rootSource"],
91
- "synonyms": synonyms,
92
- "semantic_types": semantic_types,
93
- }
94
- return mappings
 
 
 
 
 
 
1
  # genesis/api_clients/umls_api.py
 
2
  import os
3
+ import requests
4
 
5
+ UMLS_API_KEY = os.getenv("UMLS_API_KEY")
6
+ UMLS_AUTH_ENDPOINT = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
7
  UMLS_BASE_URL = "https://uts-ws.nlm.nih.gov/rest"
8
 
 
 
9
 
10
+ def get_tgt():
 
11
  """
12
+ Get Ticket-Granting Ticket (TGT) for UMLS API authentication.
13
  """
14
+ params = {"apikey": UMLS_API_KEY}
15
+ r = requests.post(UMLS_AUTH_ENDPOINT, data=params)
16
+ r.raise_for_status()
 
17
 
18
+ # Extract TGT from HTML response
19
+ tgt_url = r.text.split('action="')[1].split('"')[0]
20
+ return tgt_url
21
 
22
+
23
+ def get_service_ticket(tgt):
24
  """
25
+ Get Service Ticket (ST) from the TGT for making API requests.
26
  """
27
+ params = {"service": "http://umlsks.nlm.nih.gov"}
28
+ r = requests.post(tgt, data=params)
29
+ r.raise_for_status()
30
+ return r.text
 
 
 
 
 
 
 
 
 
 
 
31
 
32
 
33
+ def search_umls(term: str, max_results: int = 10):
34
  """
35
+ Search UMLS for a term and return matching concepts.
36
  """
37
+ tgt = get_tgt()
38
+ st = get_service_ticket(tgt)
 
 
 
39
 
40
+ url = f"{UMLS_BASE_URL}/search/current"
41
+ params = {"string": term, "ticket": st, "pageSize": max_results}
42
+ r = requests.get(url, params=params)
43
+ r.raise_for_status()
44
 
45
+ data = r.json()
46
+ results = []
47
+
48
+ for result in data.get("result", {}).get("results", []):
49
+ results.append({
50
+ "ui": result.get("ui", ""),
51
+ "name": result.get("name", ""),
52
+ "rootSource": result.get("rootSource", "")
53
+ })
54
 
55
+ return results
56
 
57
+
58
+ def get_concept_details(cui: str):
59
  """
60
+ Retrieve detailed information for a concept by CUI (Concept Unique Identifier).
61
  """
62
+ tgt = get_tgt()
63
+ st = get_service_ticket(tgt)
64
+
65
+ url = f"{UMLS_BASE_URL}/content/current/CUI/{cui}"
66
+ params = {"ticket": st}
67
+ r = requests.get(url, params=params)
68
+ r.raise_for_status()
69
+ return r.json()
70
 
71
 
72
+ def map_related_concepts(term: str):
73
  """
74
+ Search UMLS and map related concepts with synonyms and semantic types.
75
  """
76
+ concepts = search_umls(term, max_results=5)
77
+ mapped = []
78
+
79
+ for c in concepts:
80
+ cui = c["ui"]
81
+ if cui and cui != "NONE":
82
+ details = get_concept_details(cui)
83
+ entity = details.get("result", {})
84
+
85
+ mapped.append({
86
+ "cui": cui,
87
+ "name": entity.get("name", ""),
88
+ "synonyms": [syn.get("name", "") for syn in entity.get("atoms", [])],
89
+ "semantic_types": [st.get("name", "") for st in entity.get("semanticTypes", [])],
90
+ "rootSource": c.get("rootSource", "")
91
+ })
92
+
93
+ return mapped