Spaces:

mgbam
/

MCP_Res

Starting on CPU Upgrade

App Files Files Community

mgbam commited on 3 days ago

Commit

0fb7617

verified ·

1 Parent(s): 70ede12

Update mcp/orchestrator.py

Browse files

Files changed (1) hide show

mcp/orchestrator.py +64 -24

mcp/orchestrator.py CHANGED Viewed

@@ -9,7 +9,7 @@ MedGenesis – dual-LLM orchestrator (v5)
 from __future__ import annotations
 import asyncio, itertools, logging
-from typing import Dict, Any, List
 from mcp.arxiv           import fetch_arxiv
 from mcp.pubmed          import fetch_pubmed
@@ -29,15 +29,19 @@ log = logging.getLogger(__name__)
 _DEFAULT_LLM = "openai"
-def _llm_router(engine: str = _DEFAULT_LLM):
     if engine.lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
 async def _safe_gather(*tasks, return_exceptions: bool = False):
     results = await asyncio.gather(*tasks, return_exceptions=True)
-    cleaned = []
     for r in results:
         if isinstance(r, Exception):
             log.warning("Task failed: %s", r)
@@ -49,7 +53,16 @@ async def _safe_gather(*tasks, return_exceptions: bool = False):
 async def _gene_enrichment(keys: List[str]) -> Dict[str, Any]:
-    jobs = []
     for k in keys:
         jobs.extend([
             asyncio.create_task(search_gene(k)),
@@ -58,9 +71,11 @@ async def _gene_enrichment(keys: List[str]) -> Dict[str, Any]:
             asyncio.create_task(fetch_ensembl(k)),
             asyncio.create_task(fetch_ot(k)),
         ])
-    res = await _safe_gather(*jobs, return_exceptions=True)
-    # split into buckets of 5
-    def bucket(i): return [x for idx, x in enumerate(res) if idx % 5 == i and not isinstance(x, Exception)]
     return {
         "ncbi": bucket(0),
         "mesh": bucket(1),
@@ -71,22 +86,35 @@ async def _gene_enrichment(keys: List[str]) -> Dict[str, Any]:
 async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, Any]:
     # 1) Literature
-    pm_t = asyncio.create_task(fetch_pubmed(query, max_results=7))
-    ar_t = asyncio.create_task(fetch_arxiv(query, max_results=7))
-    papers_raw = await _safe_gather(pm_t, ar_t)
     papers = list(itertools.chain.from_iterable(papers_raw))[:30]
-    # 2) Seeds
     seeds = {
-        w for p in papers for w in p.get("summary", "")[:500].split() if w.isalpha()
     }
     seeds = list(seeds)[:10]
-    # 3) Fan-out
     umls_tasks = [asyncio.create_task(lookup_umls(k)) for k in seeds]
     fda_tasks  = [asyncio.create_task(fetch_drug_safety(k)) for k in seeds]
-    gene_t     = asyncio.create_task(_gene_enrichment(seeds))
     trials_t   = asyncio.create_task(fetch_clinical_trials(query, max_studies=10))
     cbio_t     = asyncio.create_task(
         fetch_cbio_variants(seeds[0]) if seeds else asyncio.sleep(0, result=[])
@@ -95,12 +123,12 @@ async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, A
     umls_list, fda_list, gene_data, trials, variants = await asyncio.gather(
         _safe_gather(*umls_tasks, return_exceptions=True),
         _safe_gather(*fda_tasks, return_exceptions=True),
-        gene_t,
         trials_t,
         cbio_t,
     )
-    # 4) Genes
     genes = {
         g["symbol"]
         for src in (gene_data["ncbi"], gene_data["mygene"], gene_data["ensembl"], gene_data["ot_assoc"])
@@ -108,22 +136,31 @@ async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, A
     }
     genes = list(genes)
-    # 5) Dedupe variants by coords
-    seen = set(); unique_vars = []
     for v in variants or []:
-        key = (v.get("chromosome"), v.get("startPosition"), v.get("referenceAllele"), v.get("variantAllele"))
         if key not in seen:
-            seen.add(key); unique_vars.append(v)
-    # 6) LLM summary
-    sum_fn, _, engine_used = _llm_router(llm)
     combined = " ".join(p.get("summary", "") for p in papers)
-    ai_summary = await sum_fn(combined[:12000])
     return {
         "papers": papers,
         "umls": [u for u in umls_list if not isinstance(u, Exception)],
-        "drug_safety": list(itertools.chain.from_iterable(dfa for dfa in fda_list if isinstance(dfa, list))),
         "clinical_trials": trials or [],
         "variants": unique_vars,
         "genes": gene_data["ncbi"] + gene_data["ensembl"] + gene_data["mygene"],
@@ -135,6 +172,9 @@ async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, A
 async def answer_ai_question(question: str, context: str, llm: str = _DEFAULT_LLM) -> Dict[str, str]:
     _, qa_fn, _ = _llm_router(llm)
     prompt = f"Q: {question}\nContext: {context}\nA:"
     try:

 from __future__ import annotations
 import asyncio, itertools, logging
+from typing import Dict, Any, List, Tuple
 from mcp.arxiv           import fetch_arxiv
 from mcp.pubmed          import fetch_pubmed
 _DEFAULT_LLM = "openai"
+def _llm_router(engine: str = _DEFAULT_LLM) -> Tuple:
+    """Choose summarization and QA functions based on engine name."""
     if engine.lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
 async def _safe_gather(*tasks, return_exceptions: bool = False):
+    """
+    Await multiple coroutines, log any exceptions, and optionally return them.
+    """
     results = await asyncio.gather(*tasks, return_exceptions=True)
+    cleaned: List[Any] = []
     for r in results:
         if isinstance(r, Exception):
             log.warning("Task failed: %s", r)
 async def _gene_enrichment(keys: List[str]) -> Dict[str, Any]:
+    """
+    Fan-out gene-related endpoints for each seed keyword:
+      - NCBI gene lookup
+      - MeSH definition
+      - MyGene.info
+      - Ensembl cross-refs
+      - OpenTargets associations
+    Returns a dict of results.
+    """
+    jobs: List[asyncio.Task] = []
     for k in keys:
         jobs.extend([
             asyncio.create_task(search_gene(k)),
             asyncio.create_task(fetch_ensembl(k)),
             asyncio.create_task(fetch_ot(k)),
         ])
+    results = await _safe_gather(*jobs, return_exceptions=True)
+    def bucket(idx: int) -> List[Any]:
+        return [res for i, res in enumerate(results) if i % 5 == idx and not isinstance(res, Exception)]
     return {
         "ncbi": bucket(0),
         "mesh": bucket(1),
 async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, Any]:
+    """
+    Main entry point. Performs:
+      1. Literature fetch (PubMed + arXiv)
+      2. Keyword seed extraction
+      3. Bio-enrichment (UMLS, OpenFDA, gene services)
+      4. Clinical trials lookup
+      5. cBioPortal variants
+      6. AI LLM summary
+    Returns a unified dict for the UI.
+    """
     # 1) Literature
+    pubmed_t = asyncio.create_task(fetch_pubmed(query, max_results=7))
+    arxiv_t  = asyncio.create_task(fetch_arxiv(query, max_results=7))
+    papers_raw = await _safe_gather(pubmed_t, arxiv_t)
     papers = list(itertools.chain.from_iterable(papers_raw))[:30]
+    # 2) Seed keywords
     seeds = {
+        w.strip()
+        for p in papers
+        for w in p.get("summary", "")[:500].split()
+        if w.isalpha()
     }
     seeds = list(seeds)[:10]
+    # 3) Bio-enrichment fan-out
     umls_tasks = [asyncio.create_task(lookup_umls(k)) for k in seeds]
     fda_tasks  = [asyncio.create_task(fetch_drug_safety(k)) for k in seeds]
+    gene_task  = asyncio.create_task(_gene_enrichment(seeds))
     trials_t   = asyncio.create_task(fetch_clinical_trials(query, max_studies=10))
     cbio_t     = asyncio.create_task(
         fetch_cbio_variants(seeds[0]) if seeds else asyncio.sleep(0, result=[])
     umls_list, fda_list, gene_data, trials, variants = await asyncio.gather(
         _safe_gather(*umls_tasks, return_exceptions=True),
         _safe_gather(*fda_tasks, return_exceptions=True),
+        gene_task,
         trials_t,
         cbio_t,
     )
+    # 4) Deduplicate gene symbols from enrichment
     genes = {
         g["symbol"]
         for src in (gene_data["ncbi"], gene_data["mygene"], gene_data["ensembl"], gene_data["ot_assoc"])
     }
     genes = list(genes)
+    # 5) Deduplicate variants by genomic coordinates
+    seen: set = set()
+    unique_vars: List[dict] = []
     for v in variants or []:
+        key = (
+            v.get("chromosome"),
+            v.get("startPosition"),
+            v.get("referenceAllele"),
+            v.get("variantAllele"),
+        )
         if key not in seen:
+            seen.add(key)
+            unique_vars.append(v)
+    # 6) LLM-driven summary
+    summarize_fn, _, engine_used = _llm_router(llm)
     combined = " ".join(p.get("summary", "") for p in papers)
+    ai_summary = await summarize_fn(combined[:12000])
     return {
         "papers": papers,
         "umls": [u for u in umls_list if not isinstance(u, Exception)],
+        "drug_safety": list(
+            itertools.chain.from_iterable(dfa for dfa in fda_list if isinstance(dfa, list))
+        ),
         "clinical_trials": trials or [],
         "variants": unique_vars,
         "genes": gene_data["ncbi"] + gene_data["ensembl"] + gene_data["mygene"],
 async def answer_ai_question(question: str, context: str, llm: str = _DEFAULT_LLM) -> Dict[str, str]:
+    """
+    Follow-up QA: uses the designated QA function from the LLM router.
+    """
     _, qa_fn, _ = _llm_router(llm)
     prompt = f"Q: {question}\nContext: {context}\nA:"
     try: