Spaces:
Sleeping
Sleeping
File size: 3,957 Bytes
e2c04b6 5d480b1 a75a9dc 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 a75a9dc 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a 2efa720 5d480b1 7b0bd9a 5d480b1 e2c04b6 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a a75a9dc 7b0bd9a 5d480b1 7b0bd9a 5d480b1 7b0bd9a a75a9dc 5d480b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import os
import re
from datetime import datetime
from typing import Dict, Any, List
from .ontology import expand_terms_with_ontology
from .structures import fetch_structures_for_terms
from .narration import narrate_text
from .graphdb import write_topic_and_papers
from .providers import (
run_deepseek_summary,
run_gemini_polish,
run_openai_image,
run_hf_image,
pubmed_fallback_search
)
# Environment variables
UMLS_API_KEY = os.getenv("UMLS_API_KEY")
BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY")
NCBI_API_KEY = os.getenv("NCBI_API_KEY")
NCBI_EMAIL = os.getenv("NCBI_EMAIL")
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
NEO4J_URI = os.getenv("NEO4J_URI")
SYNBIO_MODE = True
def extract_citations(text: str) -> List[Dict[str, str]]:
"""Extract citations from model output."""
citations = []
doi_pattern = r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)"
pmid_pattern = r"PMID:\s*(\d+)"
url_pattern = r"(https?://[^\s)]+)"
for match in re.finditer(doi_pattern, text, re.IGNORECASE):
citations.append({"type": "DOI", "id": match.group(1), "url": f"https://doi.org/{match.group(1)}"})
for match in re.finditer(pmid_pattern, text, re.IGNORECASE):
citations.append({"type": "PMID", "id": match.group(1), "url": f"https://pubmed.ncbi.nlm.nih.gov/{match.group(1)}/"})
for match in re.finditer(url_pattern, text, re.IGNORECASE):
if not any(c["url"] == match.group(1) for c in citations):
citations.append({"type": "URL", "id": "", "url": match.group(1)})
return citations
def synthetic_biology_prompt_inject(query: str, expanded_terms: List[str]) -> str:
"""Bias prompt toward synthetic biology domain."""
synbio_context = (
"You are an expert synthetic biologist and AI researcher. "
"Focus on CRISPR, metabolic engineering, living therapeutics, protein design, "
"biosensors, and biosecurity. Integrate literature, molecular structures, market trends, "
"and policy/regulatory outlook. Produce a structured, citation-rich report."
)
return f"{synbio_context}\n\nQuery: {query}\nExpanded terms: {', '.join(expanded_terms)}"
def research_once(query: str, graph_preview: bool = True, narration: bool = True) -> Dict[str, Any]:
"""Main synthetic biology research pipeline."""
# 1. Expand query with ontology
expanded_terms = expand_terms_with_ontology(query, UMLS_API_KEY, BIOPORTAL_API_KEY)
# 2. Inject synthetic biology context
enriched_query = synthetic_biology_prompt_inject(query, expanded_terms) if SYNBIO_MODE else query
# 3. Run DeepSeek summarization
raw_summary = run_deepseek_summary(enriched_query)
# 4. Polish with Gemini
polished_summary = run_gemini_polish(raw_summary)
# 5. Extract citations
citations = extract_citations(polished_summary)
if not citations:
fallback_cites = pubmed_fallback_search(query, NCBI_API_KEY, NCBI_EMAIL)
citations.extend(fallback_cites)
# 6. Fetch molecular structures
structures = fetch_structures_for_terms(expanded_terms)
# 7. Generate visual diagram
visual_image_url = run_openai_image(query)
if not visual_image_url:
print("[Image] Falling back to Hugging Face Stable Diffusion...")
visual_image_url = run_hf_image(f"Scientific diagram about {query}")
# 8. Write to Neo4j
if graph_preview and NEO4J_URI:
write_topic_and_papers(query, citations, expanded_terms)
# 9. Narrate executive summary
audio_url = narrate_text(polished_summary) if narration and ELEVEN_LABS_API_KEY else None
# 10. Return structured output
return {
"timestamp": datetime.utcnow().isoformat(),
"query": query,
"expanded_terms": expanded_terms,
"summary": polished_summary,
"citations": citations,
"structures": structures,
"visual_image_url": visual_image_url,
"audio_url": audio_url
}
|