mgbam commited on
Commit
47975fb
Β·
verified Β·
1 Parent(s): 6b29525

Update genesis/pipeline.py

Browse files
Files changed (1) hide show
  1. genesis/pipeline.py +77 -67
genesis/pipeline.py CHANGED
@@ -1,78 +1,88 @@
1
  # genesis/pipeline.py
2
- """
3
- GENESIS-AI Research Pipeline
4
- Coordinates ontology expansion, literature retrieval, summaries, citations, structure fetching, graphDB storage, and narration.
5
- """
6
-
7
  import os
8
- from datetime import datetime
9
-
10
- from .ontology import expand_terms_with_ontology
11
- from .molecule_viewer import fetch_structure
12
- from .narration import narrate_text
13
- from .providers import (
14
- run_deepseek_summary,
15
- run_gemini_polish,
16
- run_openai_image,
17
- pubmed_fallback_search
18
  )
19
- from .graphdb import write_topic_and_papers
 
20
 
21
- # ENV
22
- ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
23
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
  NEO4J_URI = os.getenv("NEO4J_URI")
 
 
25
 
26
- SYNBIO_MODE = True
27
-
28
- def synthetic_biology_prompt_inject(query, expanded_terms):
29
- """Injects domain-specific bias toward synthetic biology research."""
30
- context = (
31
- "You are an expert in synthetic biology. Focus on CRISPR, metabolic engineering, "
32
- "living therapeutics, protein design, biosensors, and biosecurity. Include literature, "
33
- "structures, market trends, and regulatory insights with citations."
34
- )
35
- return f"{context}\n\nQuery: {query}\nExpanded terms: {', '.join(expanded_terms)}"
36
-
37
- def research_once(query, graph_preview=True, narration=True):
38
- """Runs the GENESIS-AI pipeline for a given research query."""
39
-
40
- # 1. Expand ontology
41
- expanded_terms = expand_terms_with_ontology(query)
42
-
43
- # 2. Domain injection
44
- enriched_query = synthetic_biology_prompt_inject(query, expanded_terms) if SYNBIO_MODE else query
45
-
46
- # 3. Summarize (DeepSeek)
47
- summary_raw = run_deepseek_summary(enriched_query)
48
-
49
- # 4. Polish (Gemini)
50
- summary_polished = run_gemini_polish(summary_raw)
51
-
52
- # 5. Citations
53
- citations = pubmed_fallback_search(query)
54
-
55
- # 6. Structures
56
- structures = [fetch_structure(term) for term in expanded_terms]
57
 
58
- # 7. Visual (OpenAI Image)
59
- image_url = run_openai_image(query)
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- # 8. GraphDB
62
- if graph_preview and NEO4J_URI:
63
- write_topic_and_papers(query, citations, expanded_terms)
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # 9. Narration
66
- audio_url = narrate_text(summary_polished) if narration and ELEVEN_LABS_API_KEY else None
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- # 10. Output
69
- return {
70
- "timestamp": datetime.utcnow().isoformat(),
71
- "query": query,
72
- "expanded_terms": expanded_terms,
73
- "summary": summary_polished,
74
- "citations": citations,
75
- "structures": structures,
76
- "image_url": image_url,
77
- "audio_url": audio_url
78
  }
 
 
 
 
1
  # genesis/pipeline.py
 
 
 
 
 
2
  import os
3
+ from genesis.providers import (
4
+ run_pubmed_literature,
5
+ run_molecule_lookup,
6
+ run_pathway_mapping,
7
+ run_funding_network,
8
+ run_image_analysis,
9
+ run_tts
 
 
 
10
  )
11
+ from genesis.utils.graph_tools import render_graph_svg
12
+ from genesis.utils.pdf_export import export_pdf_report
13
 
14
+ # API Keys (already in HF secrets)
 
 
15
  NEO4J_URI = os.getenv("NEO4J_URI")
16
+ NEO4J_USER = os.getenv("NEO4J_USER")
17
+ NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
18
 
19
+ # ---------------------------------------------------------
20
+ # 1 β€” Literature Mode
21
+ # ---------------------------------------------------------
22
+ def run_literature_mode(query, narration=False):
23
+ results = run_pubmed_literature(query)
24
+ output = {
25
+ "summary": results.get("summary"),
26
+ "citations": results.get("citations"),
27
+ "visuals": results.get("visuals")
28
+ }
29
+ if narration:
30
+ output["audio_url"] = run_tts(results.get("summary", ""))
31
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # ---------------------------------------------------------
34
+ # 2 β€” Molecule Mode
35
+ # ---------------------------------------------------------
36
+ def run_molecule_mode(identifier, narration=False):
37
+ results = run_molecule_lookup(identifier)
38
+ output = {
39
+ "summary": results.get("summary"),
40
+ "structure_3d": results.get("structure_3d"),
41
+ "properties": results.get("properties")
42
+ }
43
+ if narration:
44
+ output["audio_url"] = run_tts(results.get("summary", ""))
45
+ return output
46
 
47
+ # ---------------------------------------------------------
48
+ # 3 β€” Pathway Mode
49
+ # ---------------------------------------------------------
50
+ def run_pathway_mode(pathway_name, narration=False):
51
+ results = run_pathway_mapping(pathway_name)
52
+ graph_svg = render_graph_svg(results.get("relationships", []))
53
+ output = {
54
+ "summary": results.get("summary"),
55
+ "graph_svg": graph_svg,
56
+ "analysis": results.get("analysis")
57
+ }
58
+ if narration:
59
+ output["audio_url"] = run_tts(results.get("summary", ""))
60
+ return output
61
 
62
+ # ---------------------------------------------------------
63
+ # 4 β€” Funding Mode
64
+ # ---------------------------------------------------------
65
+ def run_funding_mode(query, narration=False):
66
+ results = run_funding_network(query)
67
+ output = {
68
+ "summary": results.get("summary"),
69
+ "funding_graph": results.get("graph_svg"),
70
+ "top_investors": results.get("top_investors")
71
+ }
72
+ if narration:
73
+ output["audio_url"] = run_tts(results.get("summary", ""))
74
+ return output
75
 
76
+ # ---------------------------------------------------------
77
+ # 5 β€” Image Mode
78
+ # ---------------------------------------------------------
79
+ def run_image_mode(image_path, narration=False):
80
+ results = run_image_analysis(image_path)
81
+ output = {
82
+ "summary": results.get("summary"),
83
+ "annotations": results.get("annotations"),
84
+ "enhanced_image": results.get("enhanced_image")
 
85
  }
86
+ if narration:
87
+ output["audio_url"] = run_tts(results.get("summary", ""))
88
+ return output