Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on 12 days ago

Commit

c30e46a

verified ·

1 Parent(s): 19e03c6

Update mcp/orchestrator.py

Browse files

Files changed (1) hide show

mcp/orchestrator.py +101 -88

mcp/orchestrator.py CHANGED Viewed

@@ -1,114 +1,127 @@
 """
-MedGenesis – dual-LLM orchestrator (OpenAI + Gemini)
-----------------------------------------------------
-Returns a single dict the UI expects.  New keys:
-• variants      – mutation summaries from cBioPortal
-• variant_count – quick count for empty-tab logic
 """
-import asyncio
-from typing import Dict, Any, List
-# literature + NLP
 from mcp.arxiv            import fetch_arxiv
 from mcp.pubmed           import fetch_pubmed
-from mcp.nlp              import extract_keywords
-# enrichment
-from mcp.umls             import lookup_umls
-from mcp.openfda          import fetch_drug_safety
 from mcp.ncbi             import search_gene, get_mesh_definition
-from mcp.disgenet         import disease_to_genes
-from mcp.clinicaltrials   import search_trials
 from mcp.mygene           import fetch_gene_info
 from mcp.ensembl          import fetch_ensembl
 from mcp.opentargets      import fetch_ot
-from mcp.cbio             import fetch_cbio          # NEW
-# LLMs
 from mcp.openai_utils     import ai_summarize, ai_qa
 from mcp.gemini           import gemini_summarize, gemini_qa
-_DEF = "openai"
-def _llm_router(llm: str):
-    llm = (llm or _DEF).lower()
-    if llm == "gemini":
-        return ("gemini", gemini_summarize, gemini_qa)
-    return ("openai", ai_summarize, ai_qa)
-# ---------------- gene meta helper ----------------
-async def _resolve_gene(sym: str) -> Dict[str, Any]:
-    for fn in (fetch_gene_info, fetch_ensembl, fetch_ot):
-        try:
-            data = await fn(sym)
-            if data:
-                return data
-        except Exception:
-            continue
-    return {}
-# ---------------- orchestrator --------------------
 async def orchestrate_search(query: str, *, llm: str = _DEF) -> Dict[str, Any]:
-    # 1 literature ---------------------------------------------------
-    arxiv_f  = asyncio.create_task(fetch_arxiv(query))
-    pubmed_f = asyncio.create_task(fetch_pubmed(query))
-    papers   = sum(await asyncio.gather(arxiv_f, pubmed_f), [])
-    # 2 keywords -----------------------------------------------------
-    blob = " ".join(p["summary"] for p in papers)
-    keys = extract_keywords(blob)[:8] if blob else []
-    # 3 parallel enrichment -----------------------------------------
-    umls_f   = [lookup_umls(k)       for k in keys]
-    fda_f    = [fetch_drug_safety(k) for k in keys]
-    ncbi_f   = [search_gene(k) for k in keys]
-    mesh_f   = [get_mesh_definition(k) for k in keys]
-    gene_meta= [ _resolve_gene(k) for k in keys[:3] ]          # cheap
-    trials_f = asyncio.create_task(search_trials(query, max_studies=20))
-    # primary await
-    (
-        umls, fda, ncbi, meshes, gmeta, trials
-    ) = await asyncio.gather(
-        asyncio.gather(*umls_f,  return_exceptions=True),
-        asyncio.gather(*fda_f,   return_exceptions=True),
-        asyncio.gather(*ncbi_f,  return_exceptions=True),
-        asyncio.gather(*mesh_f,  return_exceptions=True),
-        asyncio.gather(*gene_meta, return_exceptions=True),
-        trials_f,
     )
-    # 4 variants (fire & forget; don’t fail whole run) --------------
-    var_jobs = [fetch_cbio(g.get("symbol") or k)
-                for g, k in zip(gmeta, keys[:len(gmeta)])]
-    try:
-        variants = sum(await asyncio.gather(*var_jobs), [])
-    except Exception:
-        variants = []
-    # 5 LLM summary -------------------------------------------------
-    _, summarise, _ = _llm_router(llm)
-    summary = await summarise(blob) if blob else "No abstracts found."
     return {
         "papers"          : papers,
         "umls"            : umls,
         "drug_safety"     : fda,
-        "genes"           : sum(ncbi, []),
-        "mesh_defs"       : meshes,
-        "gene_meta"       : gmeta,
-        "gene_disease"    : await disease_to_genes(query) or [],
-        "clinical_trials" : trials,
-        "variants"        : variants,
-        "variant_count"   : len(variants),
         "ai_summary"      : summary,
-        "llm_used"        : llm.lower(),
     }
-# ---------------- follow-up QA --------------------
 async def answer_ai_question(question: str, *, context: str, llm: str = _DEF) -> Dict[str, str]:
-    _, _, qa_fn = _llm_router(llm)
-    ans = await qa_fn(question, context)
-    return {"answer": ans}

 """
+MedGenesis – dual-LLM orchestrator
+----------------------------------
+• Accepts llm = "openai" | "gemini"   (falls back to OpenAI)
+• Returns one unified dict the UI can rely on.
 """
+from __future__ import annotations
+import asyncio, itertools, logging
+from typing import Dict, Any, List, Tuple
 from mcp.arxiv            import fetch_arxiv
 from mcp.pubmed           import fetch_pubmed
 from mcp.ncbi             import search_gene, get_mesh_definition
 from mcp.mygene           import fetch_gene_info
 from mcp.ensembl          import fetch_ensembl
 from mcp.opentargets      import fetch_ot
+from mcp.umls             import lookup_umls
+from mcp.openfda          import fetch_drug_safety
+from mcp.disgenet         import disease_to_genes
+from mcp.clinicaltrials   import search_trials
+from mcp.cbio             import fetch_cbio
 from mcp.openai_utils     import ai_summarize, ai_qa
 from mcp.gemini           import gemini_summarize, gemini_qa
+log = logging.getLogger(__name__)
+_DEF = "openai"                                # default engine
+# ─────────────────────────────────── helpers ───────────────────────────────────
+def _llm_router(engine: str = _DEF) -> Tuple:
+    if engine.lower() == "gemini":
+        return gemini_summarize, gemini_qa, "gemini"
+    return ai_summarize, ai_qa, "openai"
+async def _gather_safely(*aws, as_list: bool = True):
+    """await gather() that converts Exception → RuntimeError placeholder"""
+    out = await asyncio.gather(*aws, return_exceptions=True)
+    if as_list:
+        # filter exceptions – keep structure but drop failures
+        return [x for x in out if not isinstance(x, Exception)]
+    return out
+async def _gene_enrichment(keys: List[str]) -> Dict[str, Any]:
+    jobs = []
+    for k in keys:
+        jobs += [
+            search_gene(k),                    # basic gene info
+            get_mesh_definition(k),            # MeSH definitions
+            fetch_gene_info(k),                # MyGene
+            fetch_ensembl(k),                  # Ensembl x-refs
+            fetch_ot(k),                       # Open Targets associations
+        ]
+    res = await _gather_safely(*jobs, as_list=False)
+    # slice & compress five-way fan-out
+    combo = lambda idx: [r for i, r in enumerate(res) if i % 5 == idx and r]
+    return {
+        "ncbi"     : combo(0),
+        "mesh"     : combo(1),
+        "mygene"   : combo(2),
+        "ensembl"  : combo(3),
+        "ot_assoc" : combo(4),
+    }
+# ───────────────────────────────── orchestrator ────────────────────────────────
 async def orchestrate_search(query: str, *, llm: str = _DEF) -> Dict[str, Any]:
+    """Main entry – returns dict for the Streamlit UI"""
+    # 1  Literature – run in parallel
+    arxiv_task  = asyncio.create_task(fetch_arxiv(query))
+    pubmed_task = asyncio.create_task(fetch_pubmed(query))
+    papers_raw  = await _gather_safely(arxiv_task, pubmed_task)
+    papers      = list(itertools.chain.from_iterable(papers_raw))[:30]   # keep ≤30
+    # 2  Keyword extraction (very light – only from abstracts)
+    kws = {w for p in papers for w in (p["summary"][:500].split()) if w.isalpha()}
+    kws = list(kws)[:10]                           # coarse, fast -> 10 seeds
+    # 3  Bio-enrichment fan-out
+    umls_f       = [_safe_task(lookup_umls, k) for k in kws]
+    fda_f        = [_safe_task(fetch_drug_safety, k) for k in kws]
+    gene_bundle  = asyncio.create_task(_gene_enrichment(kws))
+    trials_task  = asyncio.create_task(search_trials(query, max_studies=20))
+    cbio_task    = asyncio.create_task(fetch_cbio(kws[0] if kws else ""))
+    umls, fda, gene_dat, trials, variants = await asyncio.gather(
+        _gather_safely(*umls_f),
+        _gather_safely(*fda_f),
+        gene_bundle,
+        trials_task,
+        cbio_task,
     )
+    # 4  LLM summary
+    summarise_fn, _, engine = _llm_router(llm)
+    summary = await summarise_fn(" ".join(p["summary"] for p in papers)[:12000])
     return {
         "papers"          : papers,
         "umls"            : umls,
         "drug_safety"     : fda,
         "ai_summary"      : summary,
+        "llm_used"        : engine,
+        "genes"           : gene_dat["ncbi"] + gene_dat["ensembl"] + gene_dat["mygene"],
+        "mesh_defs"       : gene_dat["mesh"],
+        "gene_disease"    : gene_dat["ot_assoc"],
+        "clinical_trials" : trials,
+        "variants"        : variants or [],
     }
+# ─────────────────────────────── follow-up QA ─────────────────────────────────
 async def answer_ai_question(question: str, *, context: str, llm: str = _DEF) -> Dict[str, str]:
+    """Follow-up QA using chosen LLM."""
+    _, qa_fn, _ = _llm_router(llm)
+    return {"answer": await qa_fn(f"Q: {question}\nContext: {context}\nA:")}
+# ─────────────────────────── internal util  ───────────────────────────────────
+def _safe_task(fn, *args):
+    """Helper to wrap callable → Task returning RuntimeError on exception."""
+    async def _wrapper():
+        try:
+            return await fn(*args)
+        except Exception as exc:
+            log.warning("background task %s failed: %s", fn.__name__, exc)
+            return RuntimeError(str(exc))
+    return asyncio.create_task(_wrapper())