Spaces:
Sleeping
Sleeping
| # File: orchestrator/provenance.py | |
| from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, create_engine | |
| from sqlalchemy.orm import declarative_base, relationship, sessionmaker | |
| from datetime import datetime | |
| Base = declarative_base() | |
| class Paper(Base): | |
| __tablename__ = 'papers' | |
| id = Column(String, primary_key=True) | |
| title = Column(String) | |
| authors = Column(String) | |
| abstract = Column(String) | |
| fetched_at = Column(DateTime, default=datetime.utcnow) | |
| runs = relationship('Run', back_populates='paper') | |
| class Run(Base): | |
| __tablename__ = 'runs' | |
| id = Column(Integer, primary_key=True, autoincrement=True) | |
| paper_id = Column(String, ForeignKey('papers.id')) | |
| cell_index = Column(Integer) | |
| output = Column(String) | |
| executed_at = Column(DateTime, default=datetime.utcnow) | |
| paper = relationship('Paper', back_populates='runs') | |
| def init_db(db_url: str): | |
| engine = create_engine(db_url) | |
| Base.metadata.create_all(engine) | |
| return sessionmaker(bind=engine) | |
| # File: scripts/ingest.py | |
| import sys | |
| import yaml | |
| from orchestrator.client import MCPClient | |
| """ | |
| Usage: | |
| python ingest.py "search query" | |
| """ | |
| if __name__ == '__main__': | |
| if len(sys.argv) < 2: | |
| print('Please provide a search query.') | |
| sys.exit(1) | |
| query = sys.argv[1] | |
| cfg = yaml.safe_load(open('config.yaml')) | |
| web = MCPClient(cfg['mcp_servers']['web_search']) | |
| pubmed = MCPClient(cfg['mcp_servers']['pubmed']) | |
| chroma = MCPClient(cfg['mcp_servers']['chroma']) | |
| print(f'Ingesting papers for query: {query}') | |
| papers = [] | |
| try: | |
| papers += web.call('web_search.search', {'q': query}) or [] | |
| except Exception as e: | |
| print('Web search error:', e) | |
| try: | |
| papers += pubmed.call('metatool.query', {'source': 'PubMed', 'q': query}) or [] | |
| except Exception as e: | |
| print('PubMed error:', e) | |
| for paper in papers: | |
| pid = paper.get('id') | |
| txt = paper.get('abstract', '') | |
| meta = {'title': paper.get('title'), 'authors': ','.join(paper.get('authors', []))} | |
| chroma.call('chroma.insert', {'id': pid, 'text': txt, 'metadata': meta}) | |
| print('Done ingesting!') |