Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on 14 days ago

Commit

96208dc

verified ·

1 Parent(s): eb1f007

Update mcp/orchestrator.py

Browse files

Files changed (1) hide show

mcp/orchestrator.py +119 -80

mcp/orchestrator.py CHANGED Viewed

@@ -1,99 +1,138 @@
-# ──────────────────────── mcp/orchestrator.py ─────────────────────────
-"""Dual‑LLM orchestrator coordinating literature ↔ annotation ↔ trials.
-Adds gene/variant enrichment with MyGene.info → Ensembl → OpenTargets → cBio.
 """
-import asyncio
-from typing import Dict, Any, List
-from mcp.arxiv             import fetch_arxiv
-from mcp.pubmed            import fetch_pubmed
-from mcp.nlp               import extract_keywords
-from mcp.umls              import lookup_umls
-from mcp.openfda           import fetch_drug_safety
-from mcp.clinicaltrials    import search_trials
-from mcp.gene_hub          import resolve_gene           # MyGene→Ensembl→OT
-from mcp.cbio              import fetch_cbio_variants
-from mcp.openai_utils      import ai_summarize, ai_qa
-from mcp.gemini            import gemini_summarize, gemini_qa
-_DEF = "openai"
-# ------------ light LLM router ------------
-def _llm_router(llm: str):
-    if llm.lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
-# ---------------- gene / variant enrichment --------------------------
-async def _enrich_gene_block(keywords: List[str]) -> Dict[str, Any]:
-    out: List[Dict] = []
-    variants: Dict[str, List[Dict]] = {}
-    for kw in keywords:
-        g = await resolve_gene(kw)
-        if g:
-            out.append(g)
-            # fetch tumour variants – fire & forget (errors ignored)
-            try:
-                variants[kw] = await fetch_cbio_variants(kw)
-            except Exception:
-                variants[kw] = []
-    return {"genes": out, "variants": variants}
-# ---------------- orchestrator entry‑points --------------------------
-async def orchestrate_search(query: str, llm: str = _DEF) -> Dict[str, Any]:
-    """Run search, summarise and join annotations for the UI."""
-    # literature ------------------------------------------------------
-    arxiv_task  = asyncio.create_task(fetch_arxiv(query, max_results=20))
-    pubmed_task = asyncio.create_task(fetch_pubmed(query, max_results=20))
-    papers      = sum(await asyncio.gather(arxiv_task, pubmed_task), [])
-    # NLP keyword extraction -----------------------------------------
-    blob    = " ".join(p.get("summary", "") for p in papers)[:60_000]
-    keywords = extract_keywords(blob)[:12]
-    # enrichment (in parallel) ---------------------------------------
-    umls_f      = [lookup_umls(k)       for k in keywords]
-    fda_f       = [fetch_drug_safety(k) for k in keywords]
-    gene_block  = asyncio.create_task(_enrich_gene_block(keywords))
-    trials_task = asyncio.create_task(search_trials(query, max_studies=20))
-    umls, fda, gene_data, trials = await asyncio.gather(
-        asyncio.gather(*umls_f, return_exceptions=True),
-        asyncio.gather(*fda_f,  return_exceptions=True),
-        gene_block,
-        trials_task,
     )
-    # summarise via LLM ----------------------------------------------
-    summarise, _, engine_name = _llm_router(llm)
     try:
-        summary = await summarise(blob)
     except Exception:
-        summary = "LLM summarisation unavailable."  # graceful fallback
     return {
-        "papers": papers,
-        "umls":   umls,
-        "drug_safety": fda,
-        "genes":        gene_data["genes"],
-        "variants":     gene_data["variants"],
-        "clinical_trials": trials,
-        "ai_summary": summary,
-        "llm_used":  engine_name,
     }
-async def answer_ai_question(question: str, *, context: str, llm: str = _DEF) -> Dict[str, str]:
-    """Follow‑up Q&A via selected LLM."""
     _, qa_fn, _ = _llm_router(llm)
     try:
         answer = await qa_fn(question, context)
     except Exception:
         answer = "LLM unavailable or quota exceeded."
-    return {"answer": answer}

 """
+MedGenesis – multi-API orchestrator
+──────────────────────────────────
+• Supports OpenAI or Gemini (pass llm="openai" | "gemini")
+• Falls back between redundant data sources whenever possible
+• All network I/O is async & individually time-bounded
+"""
+from __future__ import annotations
+import asyncio, textwrap
+from typing import Any, Dict, List, Tuple
+# ── 1. Literature helpers ────────────────────────────────────────────
+from mcp.arxiv            import fetch_arxiv
+from mcp.pubmed           import fetch_pubmed
+# ── 2. Gene / disease / expression helpers ───────────────────────────
+from mcp.gene_hub         import resolve_gene          # smart dispatcher
+from mcp.mygene           import fetch_gene_info
+from mcp.ensembl          import fetch_ensembl
+from mcp.opentargets      import fetch_ot              # tractability, constraint
+from mcp.cbio             import fetch_cbio
+# ── 3. Safety, trials, concepts ──────────────────────────────────────
+from mcp.openfda          import fetch_drug_safety
+from mcp.clinicaltrials   import search_trials
+from mcp.umls             import lookup_umls
+from mcp.disgenet         import disease_to_genes
+# ── 4. Chem & drug metadata ──────────────────────────────────────────
+from mcp.drugcentral_ext  import fetch_drugcentral
+from mcp.pubchem_ext      import fetch_compound
+# ── 5. LLM utils (OpenAI & Gemini) ───────────────────────────────────
+from mcp.openai_utils     import ai_summarize, ai_qa
+from mcp.gemini           import gemini_summarize, gemini_qa
+###############################################################################
+# Internal routing helpers
+###############################################################################
+_DEFAULT_LLM = "openai"
+def _llm_router(choice: str) -> Tuple:
+    """
+    Return (summary_fn, qa_fn, tag) for the requested engine.
+    """
+    if str(choice).lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
+###############################################################################
+#  High-level enrichment helpers
+###############################################################################
+async def _keyword_enrichment(keywords: List[str]) -> Dict[str, Any]:
+    """
+    Fan-out to UMLS, Drug Safety, and probes gene/Disease APIs in parallel.
+    """
+    umls_tasks  = [lookup_umls(k)         for k in keywords]
+    fda_tasks   = [fetch_drug_safety(k)   for k in keywords]
+    gene_tasks  = [resolve_gene(k)        for k in keywords]
+    # gather protects against individual failures
+    umls, fda, genes = await asyncio.gather(
+        asyncio.gather(*umls_tasks,  return_exceptions=True),
+        asyncio.gather(*fda_tasks,   return_exceptions=True),
+        asyncio.gather(*gene_tasks,  return_exceptions=True),
     )
+    # flatten & sanitise
+    return {
+        "umls" : [u for u in umls  if not isinstance(u, Exception)],
+        "fda"  : [d for d in fda   if not isinstance(d, Exception)],
+        "genes": [g for g in genes if not isinstance(g, Exception)],
+    }
+###############################################################################
+#  Public orchestration entry-points
+###############################################################################
+async def orchestrate_search(query: str, *, llm: str=_DEFAULT_LLM,
+                             max_papers: int = 25,
+                             max_trials: int = 20) -> Dict[str, Any]:
+    """
+    Full pipeline:
+      1. Fetch literature  (arXiv + PubMed)
+      2. Derive keywords   (simple TF filtering)
+      3. Multi-API enrich  (UMLS, safety, gene, trials, chem)
+      4. Summarise with LLM
+    """
+    # ── 1 literature (parallel) ───────────────────────────────────────
+    arxiv_task  = asyncio.create_task(fetch_arxiv(query,  max_results=max_papers//2))
+    pubmed_task = asyncio.create_task(fetch_pubmed(query, max_results=max_papers//2))
+    papers      = sum(await asyncio.gather(arxiv_task, pubmed_task, return_exceptions=False), [])
+    # ── 2 keywords (top-8 by naive word-freq) ─────────────────────────
+    joined  = " ".join(p["summary"] for p in papers)
+    tokens  = [w for w in joined.split() if len(w) > 4]
+    freq    = {}
+    for t in tokens: freq[t] = freq.get(t, 0) + 1
+    keywords = sorted(freq, key=freq.get, reverse=True)[:8]
+    # ── 3 enrichment ──────────────────────────────────────────────────
+    enrich_task  = asyncio.create_task(_keyword_enrichment(keywords))
+    trials_task  = asyncio.create_task(search_trials(query, max_studies=max_trials))
+    gene_dis_gen = asyncio.create_task(disease_to_genes(query))  # coarse disease string
+    enrich, trials, gene_dis = await asyncio.gather(enrich_task, trials_task, gene_dis_gen)
+    # ── 4 LLM summary & return ────────────────────────────────────────
+    summarise_fn, _, engine_tag = _llm_router(llm)
     try:
+        ai_summary = await summarise_fn(joined[:15000])
     except Exception:
+        ai_summary = "LLM unavailable or quota exceeded."
     return {
+        "papers"          : papers,
+        "keywords"        : keywords,
+        "umls"            : enrich["umls"],
+        "drug_safety"     : enrich["fda"],
+        "genes"           : enrich["genes"],
+        "gene_disease"    : gene_dis,
+        "clinical_trials" : trials,
+        "ai_summary"      : ai_summary,
+        "llm_used"        : engine_tag,
     }
+async def answer_ai_question(question: str, *, context: str,
+                             llm: str=_DEFAULT_LLM) -> Dict[str, str]:
+    """
+    Follow-up Q-A on demand.
+    """
     _, qa_fn, _ = _llm_router(llm)
     try:
         answer = await qa_fn(question, context)
     except Exception:
         answer = "LLM unavailable or quota exceeded."
+    return {"answer": answer}