Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on 6 days ago

Commit

b15fc81

verified ·

1 Parent(s): a130367

Update mcp/orchestrator.py

Browse files

Files changed (1) hide show

mcp/orchestrator.py +53 -89

mcp/orchestrator.py CHANGED Viewed

@@ -1,13 +1,10 @@
 #!/usr/bin/env python3
-# mcp/orchestrator.py
 """
-MedGenesis – dual-LLM orchestrator (v4)
 ---------------------------------------
-• Accepts llm="openai" | "gemini"  (defaults to OpenAI)
-• Safely runs all data-source calls in parallel
-• Uses pytrials for ClinicalTrials.gov and pybioportal for cBioPortal
-• Returns one dict that the Streamlit UI can rely on
 """
 from __future__ import annotations
@@ -33,90 +30,95 @@ _DEFAULT_LLM = "openai"
 def _llm_router(engine: str = _DEFAULT_LLM):
-    """Returns (summarize_fn, qa_fn, engine_name)."""
     if engine.lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
 async def _safe_gather(*tasks, return_exceptions: bool = False):
-    """
-    Wrapper around asyncio.gather that logs failures
-    and optionally returns exceptions as results.
-    """
     results = await asyncio.gather(*tasks, return_exceptions=True)
     cleaned = []
-    for idx, res in enumerate(results):
-        if isinstance(res, Exception):
-            log.warning("Task %d failed: %s", idx, res)
             if return_exceptions:
-                cleaned.append(res)
         else:
-            cleaned.append(res)
     return cleaned
 async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, Any]:
-    """
-    Main entry point for MedGenesis UI.
-    Returns a dict with:
-      - papers, umls, drug_safety, clinical_trials, variants
-      - genes, mesh_defs, gene_disease
-      - ai_summary, llm_used
-    """
-    # 1) Literature (PubMed + arXiv in parallel)
-    pubmed_t = asyncio.create_task(fetch_pubmed(query, max_results=7))
-    arxiv_t  = asyncio.create_task(fetch_arxiv(query, max_results=7))
-    papers_raw = await _safe_gather(pubmed_t, arxiv_t)
     papers = list(itertools.chain.from_iterable(papers_raw))[:30]
-    # 2) Keyword seeds from abstracts (first 500 chars, split on whitespace)
     seeds = {
-        w.strip()
-        for p in papers
-        for w in p.get("summary", "")[:500].split()
-        if w.isalpha()
     }
     seeds = list(seeds)[:10]
-    # 3) Fan-out all bio-enrichment tasks safely
     umls_tasks = [asyncio.create_task(lookup_umls(k)) for k in seeds]
     fda_tasks  = [asyncio.create_task(fetch_drug_safety(k)) for k in seeds]
-    gene_enrich_t = asyncio.create_task(_gene_enrichment(seeds))
-    trials_t      = asyncio.create_task(fetch_clinical_trials(query, max_studies=10))
-    cbio_t        = asyncio.create_task(
         fetch_cbio_variants(seeds[0]) if seeds else asyncio.sleep(0, result=[])
     )
     umls_list, fda_list, gene_data, trials, variants = await asyncio.gather(
         _safe_gather(*umls_tasks, return_exceptions=True),
         _safe_gather(*fda_tasks, return_exceptions=True),
-        gene_enrich_t,
         trials_t,
         cbio_t,
     )
-    # 4) Deduplicate and flatten genes
     genes = {
         g["symbol"]
-        for source in (gene_data["ncbi"], gene_data["mygene"], gene_data["ensembl"], gene_data["ot_assoc"])
-        for g in source if isinstance(g, dict) and g.get("symbol")
     }
     genes = list(genes)
-    # 5) Dedupe variants by (chrom, pos, ref, alt) if returned as dicts
-    seen = set()
-    unique_vars: List[dict] = []
-    for var in variants or []:
-        key = (var.get("chromosome"), var.get("startPosition"), var.get("referenceAllele"), var.get("variantAllele"))
         if key not in seen:
-            seen.add(key)
-            unique_vars.append(var)
     # 6) LLM summary
-    summarize_fn, _, engine_used = _llm_router(llm)
-    long_text = " ".join(p.get("summary", "") for p in papers)
-    ai_summary = await summarize_fn(long_text[:12000])
     return {
         "papers": papers,
@@ -132,45 +134,7 @@ async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, A
     }
-async def _gene_enrichment(keys: List[str]) -> Dict[str, Any]:
-    """
-    Fan-out gene-related tasks for each seed key:
-      - NCBI gene lookup
-      - MeSH definition
-      - MyGene.info
-      - Ensembl xrefs
-      - OpenTargets associations
-    Returns a dict of lists.
-    """
-    jobs = []
-    for k in keys:
-        jobs.extend([
-            asyncio.create_task(search_gene(k)),
-            asyncio.create_task(get_mesh_definition(k)),
-            asyncio.create_task(fetch_gene_info(k)),
-            asyncio.create_task(fetch_ensembl(k)),
-            asyncio.create_task(fetch_ot(k)),
-        ])
-    results = await _safe_gather(*jobs, return_exceptions=True)
-    # Group back into 5 buckets
-    def bucket(idx: int):
-        return [r for i, r in enumerate(results) if i % 5 == idx and not isinstance(r, Exception)]
-    return {
-        "ncbi": bucket(0),
-        "mesh": bucket(1),
-        "mygene": bucket(2),
-        "ensembl": bucket(3),
-        "ot_assoc": bucket(4),
-    }
 async def answer_ai_question(question: str, context: str, llm: str = _DEFAULT_LLM) -> Dict[str, str]:
-    """
-    Follow-up QA: wraps the chosen LLM’s QA function.
-    """
     _, qa_fn, _ = _llm_router(llm)
     prompt = f"Q: {question}\nContext: {context}\nA:"
     try:

 #!/usr/bin/env python3
 """
+MedGenesis – dual-LLM orchestrator (v5)
 ---------------------------------------
+• No external 'pytrials' dependency.
+• Uses direct HTTP for clinical trials.
+• Clean async fan-out, dual-LLM support.
 """
 from __future__ import annotations
 def _llm_router(engine: str = _DEFAULT_LLM):
     if engine.lower() == "gemini":
         return gemini_summarize, gemini_qa, "gemini"
     return ai_summarize, ai_qa, "openai"
 async def _safe_gather(*tasks, return_exceptions: bool = False):
     results = await asyncio.gather(*tasks, return_exceptions=True)
     cleaned = []
+    for r in results:
+        if isinstance(r, Exception):
+            log.warning("Task failed: %s", r)
             if return_exceptions:
+                cleaned.append(r)
         else:
+            cleaned.append(r)
     return cleaned
+async def _gene_enrichment(keys: List[str]) -> Dict[str, Any]:
+    jobs = []
+    for k in keys:
+        jobs.extend([
+            asyncio.create_task(search_gene(k)),
+            asyncio.create_task(get_mesh_definition(k)),
+            asyncio.create_task(fetch_gene_info(k)),
+            asyncio.create_task(fetch_ensembl(k)),
+            asyncio.create_task(fetch_ot(k)),
+        ])
+    res = await _safe_gather(*jobs, return_exceptions=True)
+    # split into buckets of 5
+    def bucket(i): return [x for idx, x in enumerate(res) if idx % 5 == i and not isinstance(x, Exception)]
+    return {
+        "ncbi": bucket(0),
+        "mesh": bucket(1),
+        "mygene": bucket(2),
+        "ensembl": bucket(3),
+        "ot_assoc": bucket(4),
+    }
 async def orchestrate_search(query: str, llm: str = _DEFAULT_LLM) -> Dict[str, Any]:
+    # 1) Literature
+    pm_t = asyncio.create_task(fetch_pubmed(query, max_results=7))
+    ar_t = asyncio.create_task(fetch_arxiv(query, max_results=7))
+    papers_raw = await _safe_gather(pm_t, ar_t)
     papers = list(itertools.chain.from_iterable(papers_raw))[:30]
+    # 2) Seeds
     seeds = {
+        w for p in papers for w in p.get("summary", "")[:500].split() if w.isalpha()
     }
     seeds = list(seeds)[:10]
+    # 3) Fan-out
     umls_tasks = [asyncio.create_task(lookup_umls(k)) for k in seeds]
     fda_tasks  = [asyncio.create_task(fetch_drug_safety(k)) for k in seeds]
+    gene_t     = asyncio.create_task(_gene_enrichment(seeds))
+    trials_t   = asyncio.create_task(fetch_clinical_trials(query, max_studies=10))
+    cbio_t     = asyncio.create_task(
         fetch_cbio_variants(seeds[0]) if seeds else asyncio.sleep(0, result=[])
     )
     umls_list, fda_list, gene_data, trials, variants = await asyncio.gather(
         _safe_gather(*umls_tasks, return_exceptions=True),
         _safe_gather(*fda_tasks, return_exceptions=True),
+        gene_t,
         trials_t,
         cbio_t,
     )
+    # 4) Genes
     genes = {
         g["symbol"]
+        for src in (gene_data["ncbi"], gene_data["mygene"], gene_data["ensembl"], gene_data["ot_assoc"])
+        for g in src if isinstance(g, dict) and g.get("symbol")
     }
     genes = list(genes)
+    # 5) Dedupe variants by coords
+    seen = set(); unique_vars = []
+    for v in variants or []:
+        key = (v.get("chromosome"), v.get("startPosition"), v.get("referenceAllele"), v.get("variantAllele"))
         if key not in seen:
+            seen.add(key); unique_vars.append(v)
     # 6) LLM summary
+    sum_fn, _, engine_used = _llm_router(llm)
+    combined = " ".join(p.get("summary", "") for p in papers)
+    ai_summary = await sum_fn(combined[:12000])
     return {
         "papers": papers,
     }
 async def answer_ai_question(question: str, context: str, llm: str = _DEFAULT_LLM) -> Dict[str, str]:
     _, qa_fn, _ = _llm_router(llm)
     prompt = f"Q: {question}\nContext: {context}\nA:"
     try: