Spaces:
Sleeping
Sleeping
Update orchestrator/provenance.py
Browse files- orchestrator/provenance.py +41 -7
orchestrator/provenance.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
-
# SQLAlchemy models for provenance
|
| 2 |
# File: orchestrator/provenance.py
|
| 3 |
-
from sqlalchemy import Column,
|
| 4 |
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
|
| 5 |
from datetime import datetime
|
| 6 |
|
|
@@ -13,7 +12,7 @@ class Paper(Base):
|
|
| 13 |
authors = Column(String)
|
| 14 |
abstract = Column(String)
|
| 15 |
fetched_at = Column(DateTime, default=datetime.utcnow)
|
| 16 |
-
runs = relationship(
|
| 17 |
|
| 18 |
class Run(Base):
|
| 19 |
__tablename__ = 'runs'
|
|
@@ -22,11 +21,46 @@ class Run(Base):
|
|
| 22 |
cell_index = Column(Integer)
|
| 23 |
output = Column(String)
|
| 24 |
executed_at = Column(DateTime, default=datetime.utcnow)
|
| 25 |
-
paper = relationship(
|
| 26 |
-
|
| 27 |
-
# Utility to initialize and get a session
|
| 28 |
|
| 29 |
def init_db(db_url: str):
|
| 30 |
engine = create_engine(db_url)
|
| 31 |
Base.metadata.create_all(engine)
|
| 32 |
-
return sessionmaker(bind=engine)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# File: orchestrator/provenance.py
|
| 2 |
+
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, create_engine
|
| 3 |
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
|
| 4 |
from datetime import datetime
|
| 5 |
|
|
|
|
| 12 |
authors = Column(String)
|
| 13 |
abstract = Column(String)
|
| 14 |
fetched_at = Column(DateTime, default=datetime.utcnow)
|
| 15 |
+
runs = relationship('Run', back_populates='paper')
|
| 16 |
|
| 17 |
class Run(Base):
|
| 18 |
__tablename__ = 'runs'
|
|
|
|
| 21 |
cell_index = Column(Integer)
|
| 22 |
output = Column(String)
|
| 23 |
executed_at = Column(DateTime, default=datetime.utcnow)
|
| 24 |
+
paper = relationship('Paper', back_populates='runs')
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def init_db(db_url: str):
|
| 27 |
engine = create_engine(db_url)
|
| 28 |
Base.metadata.create_all(engine)
|
| 29 |
+
return sessionmaker(bind=engine)
|
| 30 |
+
|
| 31 |
+
# File: scripts/ingest.py
|
| 32 |
+
import sys
|
| 33 |
+
import yaml
|
| 34 |
+
from orchestrator.client import MCPClient
|
| 35 |
+
|
| 36 |
+
"""
|
| 37 |
+
Usage:
|
| 38 |
+
python ingest.py "search query"
|
| 39 |
+
"""
|
| 40 |
+
if __name__ == '__main__':
|
| 41 |
+
if len(sys.argv) < 2:
|
| 42 |
+
print('Please provide a search query.')
|
| 43 |
+
sys.exit(1)
|
| 44 |
+
query = sys.argv[1]
|
| 45 |
+
cfg = yaml.safe_load(open('config.yaml'))
|
| 46 |
+
web = MCPClient(cfg['mcp_servers']['web_search'])
|
| 47 |
+
pubmed = MCPClient(cfg['mcp_servers']['pubmed'])
|
| 48 |
+
chroma = MCPClient(cfg['mcp_servers']['chroma'])
|
| 49 |
+
|
| 50 |
+
print(f'Ingesting papers for query: {query}')
|
| 51 |
+
papers = []
|
| 52 |
+
try:
|
| 53 |
+
papers += web.call('web_search.search', {'q': query}) or []
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print('Web search error:', e)
|
| 56 |
+
try:
|
| 57 |
+
papers += pubmed.call('metatool.query', {'source': 'PubMed', 'q': query}) or []
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print('PubMed error:', e)
|
| 60 |
+
|
| 61 |
+
for paper in papers:
|
| 62 |
+
pid = paper.get('id')
|
| 63 |
+
txt = paper.get('abstract', '')
|
| 64 |
+
meta = {'title': paper.get('title'), 'authors': ','.join(paper.get('authors', []))}
|
| 65 |
+
chroma.call('chroma.insert', {'id': pid, 'text': txt, 'metadata': meta})
|
| 66 |
+
print('Done ingesting!')
|