Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on 16 days ago

Commit

08c0325

verified ·

1 Parent(s): 2a448c1

Update mcp/orchestrator.py

Browse files

Files changed (1) hide show

mcp/orchestrator.py +84 -135

mcp/orchestrator.py CHANGED Viewed

@@ -1,150 +1,99 @@
-#!/usr/bin/env python3
 """
-mcp/orchestrator.py — MedGenesis v5
-───────────────────────────────────
-Asynchronously fan-outs across >10 open biomedical APIs, then returns
-one consolidated dictionary for the Streamlit UI.
-Public-key–free by default:
-  • MyGene.info, Ensembl REST, Open Targets GraphQL
-  • PubMed (E-utils), arXiv
-  • UMLS, openFDA, DisGeNET
-  • Expression Atlas, ClinicalTrials.gov (+ WHO ICTRP fallback)
-  • cBioPortal, DrugCentral, PubChem
-If you add secrets **MYGENE_KEY**, **OT_KEY**, **CBIO_KEY** or
-**NCBI_EUTILS_KEY**, they are auto-detected and used — otherwise the code
-runs key-less.
-Returned payload keys
-─────────────────────
-papers, ai_summary, llm_used, umls, drug_safety,
-genes_rich, expr_atlas, drug_meta, chem_info,
-gene_disease, clinical_trials, cbio_variants
-"""
-from __future__ import annotations
 import asyncio
 from typing import Dict, Any, List
-# ── Literature ──────────────────────────────────────────────────────
-from mcp.arxiv            import fetch_arxiv
-from mcp.pubmed           import fetch_pubmed
-# ── NLP + enrichment ────────────────────────────────────────────────
-from mcp.nlp              import extract_keywords
-from mcp.umls             import lookup_umls
-from mcp.openfda          import fetch_drug_safety
-from mcp.disgenet         import disease_to_genes
-from mcp.clinicaltrials   import search_trials
-# Gene / expression modules
-from mcp.gene_hub         import resolve_gene        # MyGene → Ensembl → OT
-from mcp.atlas            import fetch_expression
-from mcp.cbio             import fetch_cbio          # cancer variants
-# Drug metadata & chemistry
-from mcp.drugcentral_ext  import fetch_drugcentral
-from mcp.pubchem_ext      import fetch_compound
-# ── Large-language model helpers ────────────────────────────────────
-from mcp.openai_utils     import ai_summarize, ai_qa
-from mcp.gemini           import gemini_summarize, gemini_qa
-_LLM_DEFAULT = "openai"
-# ────────────────────────────────────────────────────────────────────
-# LLM router
-# ────────────────────────────────────────────────────────────────────
-def _llm_router(name: str):
-    """Return (summarise_fn, qa_fn, engine_tag)."""
-    if name.lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
-# ────────────────────────────────────────────────────────────────────
-# Main orchestrator
-# ────────────────────────────────────────────────────────────────────
-async def orchestrate_search(query: str,
-                             llm: str = _LLM_DEFAULT) -> Dict[str, Any]:
-    """Run the complete async pipeline; always resolves without raising."""
-    # 1  Literature ---------------------------------------------------
-    arxiv_f  = asyncio.create_task(fetch_arxiv(query, max_results=10))
-    pubmed_f = asyncio.create_task(fetch_pubmed(query, max_results=10))
-    papers: List[Dict] = []
-    for res in await asyncio.gather(arxiv_f, pubmed_f, return_exceptions=True):
-        if not isinstance(res, Exception):
-            papers.extend(res)
-    # 2  Keyword extraction ------------------------------------------
-    corpus   = " ".join(p.get("summary", "") for p in papers)
-    keywords = extract_keywords(corpus)[:10]
-    # 3  Parallel enrichment -----------------------------------------
-    umls_jobs   = [lookup_umls(k)        for k in keywords]
-    fda_jobs    = [fetch_drug_safety(k)  for k in keywords]
-    gene_jobs   = [resolve_gene(k)       for k in keywords]
-    expr_jobs   = [fetch_expression(k)   for k in keywords]
-    drug_jobs   = [fetch_drugcentral(k)  for k in keywords]
-    chem_jobs   = [fetch_compound(k)     for k in keywords]
-    umls, fda, genes, exprs, drugs, chems = await asyncio.gather(
-        asyncio.gather(*umls_jobs,  return_exceptions=True),
-        asyncio.gather(*fda_jobs,   return_exceptions=True),
-        asyncio.gather(*gene_jobs,  return_exceptions=True),
-        asyncio.gather(*expr_jobs,  return_exceptions=True),
-        asyncio.gather(*drug_jobs,  return_exceptions=True),
-        asyncio.gather(*chem_jobs,  return_exceptions=True),
     )
-    # filter out errors / empty payloads
-    umls   = [u for u in umls if isinstance(u, dict)]
-    fda    = [d for d in fda  if d]
-    genes  = [g for g in genes if g]
-    exprs  = [e for e in exprs if e]
-    drugs  = [d for d in drugs if d]
-    chems  = [c for c in chems if c]
-    # 4  Other single-shot APIs --------------------------------------
-    gene_dis  = await disease_to_genes(query)
-    trials    = await search_trials(query, max_studies=20)
-    # Cancer variants for first 3 gene symbols (quota safety)
-    cbio_jobs = [fetch_cbio(g.get("symbol", "")) for g in genes[:3]]
-    cbio_vars = []
-    if cbio_jobs:
-        tmp = await asyncio.gather(*cbio_jobs, return_exceptions=True)
-        cbio_vars = [v for v in tmp if v]
-    # 5  AI summary ---------------------------------------------------
-    summarise, _, engine_tag = _llm_router(llm)
-    ai_summary = await summarise(corpus) if corpus else ""
-    # 6  Return payload ----------------------------------------------
     return {
-        "papers"          : papers,
-        "ai_summary"      : ai_summary,
-        "llm_used"        : engine_tag,
-        "umls"            : umls,
-        "drug_safety"     : fda,
-        "genes_rich"      : genes,
-        "expr_atlas"      : exprs,
-        "drug_meta"       : drugs,
-        "chem_info"       : chems,
-        "gene_disease"    : gene_dis,
-        "clinical_trials" : trials,
-        "cbio_variants"   : cbio_vars,
     }
-# ────────────────────────────────────────────────────────────────────
-# Follow-up question-answer
-# ────────────────────────────────────────────────────────────────────
-async def answer_ai_question(question: str, *,
-                             context: str,
-                             llm: str = _LLM_DEFAULT) -> Dict[str, str]:
-    """Return {"answer": str} using chosen LLM."""
     _, qa_fn, _ = _llm_router(llm)
-    return {"answer": await qa_fn(question, context=context)}

+# ──────────────────────── mcp/orchestrator.py ─────────────────────────
+"""Dual‑LLM orchestrator coordinating literature ↔ annotation ↔ trials.
+Adds gene/variant enrichment with MyGene.info → Ensembl → OpenTargets → cBio.
 """
 import asyncio
 from typing import Dict, Any, List
+from mcp.arxiv             import fetch_arxiv
+from mcp.pubmed            import fetch_pubmed
+from mcp.nlp               import extract_keywords
+from mcp.umls              import lookup_umls
+from mcp.openfda           import fetch_drug_safety
+from mcp.clinicaltrials    import search_trials
+from mcp.gene_hub          import resolve_gene           # MyGene→Ensembl→OT
+from mcp.cbio              import fetch_cbio_variants
+from mcp.openai_utils      import ai_summarize, ai_qa
+from mcp.gemini            import gemini_summarize, gemini_qa
+_DEF = "openai"
+# ------------ light LLM router ------------
+def _llm_router(llm: str):
+    if llm.lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
+# ---------------- gene / variant enrichment --------------------------
+async def _enrich_gene_block(keywords: List[str]) -> Dict[str, Any]:
+    out: List[Dict] = []
+    variants: Dict[str, List[Dict]] = {}
+    for kw in keywords:
+        g = await resolve_gene(kw)
+        if g:
+            out.append(g)
+            # fetch tumour variants – fire & forget (errors ignored)
+            try:
+                variants[kw] = await fetch_cbio_variants(kw)
+            except Exception:
+                variants[kw] = []
+    return {"genes": out, "variants": variants}
+# ---------------- orchestrator entry‑points --------------------------
+async def orchestrate_search(query: str, llm: str = _DEF) -> Dict[str, Any]:
+    """Run search, summarise and join annotations for the UI."""
+    # literature ------------------------------------------------------
+    arxiv_task  = asyncio.create_task(fetch_arxiv(query, max_results=20))
+    pubmed_task = asyncio.create_task(fetch_pubmed(query, max_results=20))
+    papers      = sum(await asyncio.gather(arxiv_task, pubmed_task), [])
+    # NLP keyword extraction -----------------------------------------
+    blob    = " ".join(p.get("summary", "") for p in papers)[:60_000]
+    keywords = extract_keywords(blob)[:12]
+    # enrichment (in parallel) ---------------------------------------
+    umls_f      = [lookup_umls(k)       for k in keywords]
+    fda_f       = [fetch_drug_safety(k) for k in keywords]
+    gene_block  = asyncio.create_task(_enrich_gene_block(keywords))
+    trials_task = asyncio.create_task(search_trials(query, max_studies=20))
+    umls, fda, gene_data, trials = await asyncio.gather(
+        asyncio.gather(*umls_f, return_exceptions=True),
+        asyncio.gather(*fda_f,  return_exceptions=True),
+        gene_block,
+        trials_task,
     )
+    # summarise via LLM ----------------------------------------------
+    summarise, _, engine_name = _llm_router(llm)
+    try:
+        summary = await summarise(blob)
+    except Exception:
+        summary = "LLM summarisation unavailable."  # graceful fallback
     return {
+        "papers": papers,
+        "umls":   umls,
+        "drug_safety": fda,
+        "genes":        gene_data["genes"],
+        "variants":     gene_data["variants"],
+        "clinical_trials": trials,
+        "ai_summary": summary,
+        "llm_used":  engine_name,
     }
+async def answer_ai_question(question: str, *, context: str, llm: str = _DEF) -> Dict[str, str]:
+    """Follow‑up Q&A via selected LLM."""
     _, qa_fn, _ = _llm_router(llm)
+    try:
+        answer = await qa_fn(question, context)
+    except Exception:
+        answer = "LLM unavailable or quota exceeded."
+    return {"answer": answer}