samyakshrestha commited on
Commit
d8e0712
·
0 Parent(s):

Deploy multi-agent radiology assistant

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ data/* filter=lfs diff=lfs merge=lfs -text
2
+ *.npy filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.jsonl filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Multiagent Xray Assistant
3
+ emoji: 📊
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.38.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ short_description: 'Gets a structured X-ray report using a multi-agent system '
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from src.pipeline import generate_report
3
+
4
+ def process_upload(image):
5
+ """Process uploaded X-ray image and return radiology report"""
6
+ if image is None:
7
+ return None, "Please upload a chest X-ray image."
8
+
9
+ try:
10
+ report = generate_report(image)
11
+ return image, report # Return both image and report
12
+ except Exception as e:
13
+ return image, f"Error processing image: {str(e)}"
14
+
15
+ # Create enhanced Gradio interface with custom layout
16
+ with gr.Blocks(title="Multi-Agent Radiology Assistant") as demo:
17
+ gr.Markdown("# Multi-Agent Radiology Assistant")
18
+ gr.Markdown("Upload a chest X-ray image to receive an AI-generated radiology report using cutting-edge, multi-agent analysis")
19
+
20
+ with gr.Row():
21
+ with gr.Column(scale=1):
22
+ input_image = gr.Image(
23
+ type="filepath",
24
+ label="Upload Chest X-ray",
25
+ height=400
26
+ )
27
+ process_btn = gr.Button("Generate Report", variant="primary")
28
+
29
+ with gr.Column(scale=1):
30
+ output_image = gr.Image(
31
+ label="Analyzed Image",
32
+ height=400,
33
+ interactive=False
34
+ )
35
+
36
+ with gr.Row():
37
+ output_report = gr.Markdown(
38
+ label="📄 Radiology Report",
39
+ height=300
40
+ )
41
+
42
+ # Connect the button to processing function
43
+ process_btn.click(
44
+ fn=process_upload,
45
+ inputs=[input_image],
46
+ outputs=[output_image, output_report]
47
+ )
48
+
49
+ # Add examples section
50
+ gr.Markdown("### Example X-rays")
51
+ gr.Markdown("*Upload your own chest X-ray image above to get started*")
52
+
53
+ if __name__ == "__main__":
54
+ demo.launch()
data/iu_impr.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30174b890f1bedcae6925550a207e4ae6649ca353ba882b9413a8971f43ea9d2
3
+ size 2446560
data/iu_vecs.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c8baa976a1587a72d070feb2c17ddee2c4fcc5a10ba8b80efa2d801ec7b5e5
3
+ size 15216768
data/raw_abstracts.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c2784fce84026ceaa523430cf70b6e3207e5f647713f356bc60fb40ce407887
3
+ size 317236278
data/text_faiss.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b33f4e9de88138cd9af54f2031c46a1c1ea9ddce6fbcf65ee879adeb7b636df3
3
+ size 614400045
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ crewai==0.4
3
+ gradio>=4.27
4
+ faiss-cpu
5
+ groq
6
+ google-generativeai
7
+ transformers
8
+ torch
9
+ numpy
10
+ pillow
11
+ open-clip-torch
12
+ pydantic
13
+ tqdm
src/.DS_Store ADDED
Binary file (8.2 kB). View file
 
src/__init__.py ADDED
File without changes
src/config/agents.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ vision_agent:
2
+ role: "Radiology Captioning Agent"
3
+ goal: "Use the vision_caption_tool and return back the exact output. DO NOT add, interpret, or speculate beyond what the vision_caption_tool outputs"
4
+ backstory: "A world-class expert radiologist AI specialized in chest X-ray interpretation"
5
+
6
+ pubmed_agent:
7
+ role: "Biomedical Literature Retriever specializing in retrieving the most relevant PubMed abstracts for a given chest X-ray impression"
8
+ goal: "Perform a SINGLE search for the top 3 most relevant PubMed abstracts based on a given medical caption using the pubmed_tool"
9
+ backstory: "You are an AI literature assistant. Your sole purpose is to take a text caption, use your tool exactly once to find relevant citations, and present the formatted output directly as your final answer"
10
+
11
+ iu_agent:
12
+ role: "Retrieval Agent specializing in retrieving the most semantically-similar IU chest X-ray impression"
13
+ goal: "Find and return the most semantically similar impression from the IU-Xray dataset based on a given chest X-ray image"
14
+ backstory: "You're a biomedical assistant trained on the IU-Xray dataset. Your job is to retrieve the closest stylistic and semantic match to the current case, helping the system maintain professional and realistic radiology language. You do not generate new content — you only retrieve"
15
+
16
+ draft_agent:
17
+ role: "Senior thoracic radiologist with expertise in drafting X-ray reports by synthesizing information from various sources"
18
+ goal: "Write a clinically accurate draft radiology report for a given chest X-ray, using the AI-generated visual caption, the most similar IU X-ray impression, and the top PubMed evidence"
19
+ backstory: "You have 20+ years of experience drafting high-quality chest X-ray reports. Your strength lies in fusing image-grounded observations with medically accurate language and literature-backed phrasing. You prioritize clarity, medical accuracy, and correct radiological terminology over verbosity. You hedge uncertain findings and avoid speculative or unsupported claims"
20
+
21
+ critic_agent:
22
+ role: "Senior thoracic radiologist specializing in auditing chest X-ray reports"
23
+ goal: "Ensure that the radiology report is concise, medically accurate, and free from hallucinations or unsupported conclusions"
24
+ backstory: "You have 20+ years of experience reviewing and auditing chest X-ray reports. Your goal is to ensure clinical realism, eliminate hallucinations, and unsupported language, resulting in a concise and professional clinical report"
src/config/tasks.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ caption_task:
2
+ description: "Analyze the chest X-ray at '{image_path}' and return the exact output from the vision_caption_tool"
3
+ expected_output: "The VERBATIM output of the vision_caption_tool"
4
+ agent: vision_agent
5
+
6
+ pubmed_task:
7
+ description: "From the vision agent's report, extract ONLY the key POSITIVE finding(s) from the IMPRESSION section (ignore negations or normal findings). Use THOSE finding(s) as the input query to the pubmed_tool. Return the top 2 most relevant PubMed citations with pmid, similarity score, title, and abstract, along with the finding(s) that you used as an input for the pubmed_tool"
8
+ expected_output: "List of 2 relevant PubMed citations with pmid, similarity score, title, and abstract, along with the finding(s) used as input"
9
+ agent: pubmed_agent
10
+
11
+ iu_task:
12
+ description: "Use the iu_tool to find the most similar impression in the IU-Xray dataset based on the X-ray image at '{image_path}'. Return the image UUID and the impression text of the closest match"
13
+ expected_output: "UUID and impression of the most semantically similar IU-Xray case"
14
+ agent: iu_agent
15
+
16
+ draft_task:
17
+ description: "Your task is to write a two-part radiology report based on a chest X-ray image. Use the AI-generated visual caption [①] as your PRIMARY source of findings. You may also incorporate as SECONDARY sources: Language or structure from the closest IU-Xray impression [②], and medically relevant phrasing or terminology from PubMed abstracts [③]. However: NEVER contradict the visual caption. NEVER introduce information not clearly supported by one of the three sources. DO NOT speculate or offer clinical context not evident the caption. If findings are ambiguous or limited, hedge appropriately. Tag each sentence using [①], [②], or [③] to indicate which source supports it. Your report must have two sections: FINDINGS: Objective description of radiographic features [from ①] - Use [②] or [③] ONLY for language improvements or secondary detail. IMPRESSION: Concise summary of likely clinical implications based on the findings - You may re-use validated phrases from [②] or [③] ONLY if necessary, but DO NOT overreach"
18
+ expected_output: "A two-part radiology report with FINDINGS and IMPRESSION sections"
19
+ agent: draft_agent
20
+
21
+ critic_task:
22
+ description: "Your task is to review and refine the draft radiology report (only the FINDINGS and IMPRESSIONS sections) for clinical accuracy, realism, and formatting. You MUST: Validate every sentence against its source tag: [①] = Caption, [②] = IU report, [③] = PubMed abstract. Remove any statement not clearly supported by the visual caption. Remove unsupported speculation, exaggerations, or redundant hedging. Maintain standard MIMIC-CXR formatting: terse, focused, professional. You MUST NOT: Invent new findings or reword unsupported conclusions. Add clinical context not present in the visual or evidence inputs. You MAY: Improve phrasing for clarity or brevity. Remove footnote tags in your final output. Output ONLY the final cleaned report. Keep section headers intact"
23
+ expected_output: "A cleaned two-part radiology report with FINDINGS and IMPRESSION sections. Do NOT include source tags. Do NOT include any commentary or explanation — only the final report"
24
+ agent: critic_agent
src/crew.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent, Crew, Process, Task, LLM
2
+ from crewai.project import CrewBase, agent, crew, task
3
+ from .tools_loader import get_tools
4
+
5
+ @CrewBase
6
+ class RadiologyCrew:
7
+ """Multi-agent radiology report generation crew"""
8
+
9
+ def __init__(self):
10
+ # Load tools once
11
+ self.tools = get_tools()
12
+
13
+ # Initialize LLMs with optimal settings
14
+ self.vision_llm = LLM(model="groq/meta-llama/llama-4-scout-17b-16e-instruct", temperature=0.2)
15
+ self.text_llm = LLM(model="groq/llama-3.3-70b-versatile", temperature=0.1)
16
+ self.draft_llm = LLM(model="groq/deepseek-r1-distill-llama-70b", temperature=0.3)
17
+ self.critic_llm = LLM(model="groq/meta-llama/llama-4-scout-17b-16e-instruct", temperature=0.2)
18
+
19
+ @agent
20
+ def vision_agent(self) -> Agent:
21
+ return Agent(
22
+ config=self.agents_config['vision_agent'],
23
+ tools=[self.tools["vision_tool"]],
24
+ llm=self.vision_llm,
25
+ allow_delegation=False,
26
+ verbose=False
27
+ )
28
+
29
+ @agent
30
+ def pubmed_agent(self) -> Agent:
31
+ return Agent(
32
+ config=self.agents_config['pubmed_agent'],
33
+ tools=[self.tools["pubmed_tool"]],
34
+ llm=self.text_llm,
35
+ verbose=False
36
+ )
37
+
38
+ @agent
39
+ def iu_agent(self) -> Agent:
40
+ return Agent(
41
+ config=self.agents_config['iu_agent'],
42
+ tools=[self.tools["iu_tool"]],
43
+ llm=self.text_llm,
44
+ verbose=False
45
+ )
46
+
47
+ @agent
48
+ def draft_agent(self) -> Agent:
49
+ return Agent(
50
+ config=self.agents_config['draft_agent'],
51
+ llm=self.draft_llm,
52
+ verbose=False
53
+ )
54
+
55
+ @agent
56
+ def critic_agent(self) -> Agent:
57
+ return Agent(
58
+ config=self.agents_config['critic_agent'],
59
+ llm=self.critic_llm,
60
+ verbose=True
61
+ )
62
+
63
+ @task
64
+ def caption_task(self) -> Task:
65
+ return Task(config=self.tasks_config['caption_task'])
66
+
67
+ @task
68
+ def pubmed_task(self) -> Task:
69
+ return Task(
70
+ config=self.tasks_config['pubmed_task'],
71
+ context=[self.caption_task()]
72
+ )
73
+
74
+ @task
75
+ def iu_task(self) -> Task:
76
+ return Task(config=self.tasks_config['iu_task'])
77
+
78
+ @task
79
+ def draft_task(self) -> Task:
80
+ return Task(
81
+ config=self.tasks_config['draft_task'],
82
+ context=[self.caption_task(), self.iu_task(), self.pubmed_task()]
83
+ )
84
+
85
+ @task
86
+ def critic_task(self) -> Task:
87
+ return Task(
88
+ config=self.tasks_config['critic_task'],
89
+ context=[self.caption_task(), self.iu_task(), self.draft_task()]
90
+ )
91
+
92
+ @crew
93
+ def crew(self) -> Crew:
94
+ return Crew(
95
+ agents=self.agents,
96
+ tasks=self.tasks,
97
+ process=Process.sequential,
98
+ verbose=False
99
+ )
src/pipeline.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .crew import RadiologyCrew
2
+
3
+ def generate_report(image_path: str) -> str:
4
+ """Generate radiology report for uploaded image"""
5
+ crew = RadiologyCrew().crew()
6
+ result = crew.kickoff(inputs={"image_path": image_path})
7
+ return str(result).strip()
src/tools/__init__.py ADDED
File without changes
src/tools/iu_retrieval_tool.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai.tools import BaseTool
2
+ from pydantic import BaseModel, Field
3
+ from typing import Optional, Dict
4
+ import torch
5
+ import numpy as np
6
+ import json
7
+ import os
8
+ from PIL import Image
9
+ from pathlib import Path
10
+ from open_clip import create_model_from_pretrained
11
+ import torch.nn.functional as F
12
+
13
+ # ----- Input schema for the tool -----
14
+ class IUImageInput(BaseModel):
15
+ # Defines the expected input: absolute path to the query image
16
+ image_path: str = Field(..., description="Absolute path to the query image")
17
+
18
+ # ----- Tool class -----
19
+ class IUImpressionSearchTool(BaseTool):
20
+ # Tool metadata
21
+ name: str = "iu_impression_search_tool"
22
+ description: str = (
23
+ "Retrieves the most similar IU X-ray image impression based on visual similarity "
24
+ "using BiomedCLIP embeddings."
25
+ )
26
+ args_schema: type = IUImageInput # Specifies input schema
27
+ metadata: dict = {} # Optional metadata for config overrides
28
+
29
+ def _run(self, image_path: str) -> str:
30
+ # Dynamic path resolution - same pattern as pubmed_tool
31
+ BASE_DIR = Path(__file__).parent.parent.parent # Up to main folder
32
+ default_vecs_path = str(BASE_DIR / "data" / "iu_vecs.npy") # Default path for IU vectors
33
+ default_impr_path = str(BASE_DIR / "data" / "iu_impr.jsonl") # Default path for IU impressions
34
+
35
+ # Resolve config paths with dynamic defaults (can be overridden via metadata)
36
+ vecs_path = self.metadata.get("VEC_PATH", default_vecs_path)
37
+ impr_path = self.metadata.get("IMPR_PATH", default_impr_path)
38
+ model_id = self.metadata.get("MODEL_ID", "hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224")
39
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Use GPU if available
40
+
41
+ # Check if files exist before proceeding
42
+ if not os.path.exists(vecs_path):
43
+ return f"Error: IU vectors file not found at {vecs_path}"
44
+ if not os.path.exists(impr_path):
45
+ return f"Error: IU impressions file not found at {impr_path}"
46
+
47
+ # Load BiomedCLIP model and processor
48
+ try:
49
+ model, preprocess = create_model_from_pretrained(model_id)
50
+ model = model.to(device).eval() # Move model to device and set to eval mode
51
+ except Exception as e:
52
+ return f"Error loading BiomedCLIP model: {e}"
53
+
54
+ # Embed the input image
55
+ try:
56
+ image = Image.open(image_path).convert("RGB") # Open and convert image to RGB
57
+ tensor_img = preprocess(image).unsqueeze(0).to(device) # Preprocess and add batch dimension
58
+ with torch.no_grad():
59
+ query_vec = model.encode_image(tensor_img) # Get image embedding
60
+ query_vec = F.normalize(query_vec, dim=-1).cpu().numpy() # Normalize and move to CPU numpy
61
+ except Exception as e:
62
+ return f"Error processing input image: {e}"
63
+
64
+ # Load stored IU embeddings
65
+ try:
66
+ iu_vecs = np.load(vecs_path) # Load precomputed IU image vectors
67
+ iu_vecs = iu_vecs / np.linalg.norm(iu_vecs, axis=1, keepdims=True) # Normalize vectors
68
+ except Exception as e:
69
+ return f"Error loading IU vectors: {e}"
70
+
71
+ # Compute cosine similarity between query and all IU vectors
72
+ similarities = np.dot(iu_vecs, query_vec.squeeze()) # Dot product for cosine similarity
73
+ best_idx = int(np.argmax(similarities)) # Index of most similar IU image
74
+
75
+ # Load the matched impression
76
+ try:
77
+ with open(impr_path, "r", encoding="utf-8") as f:
78
+ records = [json.loads(line) for line in f] # Load all impression records
79
+ best_match = records[best_idx] # Get the best match by index
80
+ except Exception as e:
81
+ return f"Error loading impressions metadata: {e}"
82
+
83
+ # Return formatted result with UUID and impression text
84
+ return (
85
+ f"Closest Match UUID: {best_match['uuid']}\n\n"
86
+ f"Impression:\n{best_match['impression'].strip()}"
87
+ )
src/tools/pubmed_tool.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai.tools import BaseTool
2
+ from pydantic import BaseModel, Field
3
+ from typing import List, Dict, Optional, Any
4
+ import os
5
+ import json
6
+ import torch
7
+ import faiss
8
+ import numpy as np
9
+ from pathlib import Path
10
+ from .specter2_embedder import embed_texts_specter2 # Import from same folder
11
+
12
+ # Input schema for the tool, expects a caption string
13
+ class PubmedQueryInput(BaseModel):
14
+ caption: str
15
+
16
+ # Main tool class for PubMed retrieval
17
+ class PubmedRetrievalTool(BaseTool):
18
+ # Tool name and description
19
+ name: str = "pubmed_retrieval_tool"
20
+ description: str = (
21
+ "Retrieves the most relevant PubMed articles for a given radiology caption."
22
+ )
23
+ args_schema: type = PubmedQueryInput
24
+ metadata: dict = {}
25
+
26
+ def __init__(self, **data):
27
+ # Initialize the base tool with provided data
28
+ super().__init__(**data)
29
+
30
+ def _run(self, caption: str = None, **kwargs) -> str:
31
+ """
32
+ Retrieves relevant PubMed articles based on a radiology caption.
33
+ """
34
+ # Handle edge case where caption might be in kwargs
35
+ if not caption and 'caption' in kwargs:
36
+ caption = kwargs['caption']
37
+
38
+ # Validate input: ensure caption is provided and not empty
39
+ if not caption or not str(caption).strip():
40
+ return "Error: No caption provided. Unable to search PubMed."
41
+
42
+ caption = str(caption).strip()
43
+
44
+ # Configuration - Updated path handling
45
+ BASE_DIR = Path(__file__).parent.parent.parent # Up to main folder
46
+ default_data_dir = str(BASE_DIR / "data")
47
+ # Use metadata config if available, otherwise default
48
+ data_dir = self.metadata.get("DATA_DIR", default_data_dir)
49
+ top_k = self.metadata.get("TOP_K", 3)
50
+
51
+ try:
52
+ # Load FAISS index and metadata
53
+ index_path = os.path.join(data_dir, "text_faiss.bin")
54
+ metadata_path = os.path.join(data_dir, "raw_abstracts.jsonl")
55
+
56
+ # Check if files exist
57
+ if not os.path.exists(index_path):
58
+ return f"Error: FAISS index not found at {index_path}"
59
+ if not os.path.exists(metadata_path):
60
+ return f"Error: Metadata file not found at {metadata_path}"
61
+
62
+ # Read FAISS index from disk
63
+ index = faiss.read_index(index_path)
64
+ # Load metadata (PubMed abstracts) from JSONL file
65
+ with open(metadata_path, "r", encoding="utf-8") as f:
66
+ metadata = [json.loads(line) for line in f]
67
+
68
+ # Embed the input caption using Specter2 model
69
+ query_vec = embed_texts_specter2([caption]).astype("float32")
70
+ # Search for top_k most similar articles in FAISS index
71
+ scores, indices = index.search(query_vec, top_k)
72
+
73
+ # Format results for output
74
+ formatted = []
75
+ for i, (score, idx) in enumerate(zip(scores[0], indices[0]), 1):
76
+ entry = metadata[idx]
77
+ formatted.append(
78
+ f"Citation {i}:\n"
79
+ f"PMID: {entry.get('pmid', 'Unknown')}\n"
80
+ f"Similarity Score: {score:.3f}\n"
81
+ f"Title: {entry.get('title', 'Untitled').strip()}\n"
82
+ f"Abstract: {entry.get('abstract', 'No abstract available.').strip()}\n"
83
+ )
84
+
85
+ # Return formatted citations separated by ---
86
+ return "\n---\n".join(formatted)
87
+
88
+ except Exception as e:
89
+ # Handle any errors during retrieval
90
+ return f"Error during PubMed search: {str(e)}"
src/tools/specter2_embedder.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModel
2
+ import torch
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+
6
+ # Load the tokenizer and model once
7
+ tokenizer = AutoTokenizer.from_pretrained("allenai/specter2_base") # Initialize tokenizer
8
+ model = AutoModel.from_pretrained("allenai/specter2_base") # Initialize model
9
+ model.eval() # Set model to evaluation mode
10
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Use GPU if available
11
+ model.to(device) # Move model to device
12
+
13
+ def embed_texts_specter2(texts: list[str], batch_size=16) -> np.ndarray:
14
+ embeddings = [] # List to store embeddings
15
+
16
+ for i in tqdm(range(0, len(texts), batch_size), desc="Embedding with SPECTER2"):
17
+ batch_texts = texts[i:i+batch_size] # Get batch of texts
18
+ inputs = tokenizer(batch_texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device) # Tokenize and move to device
19
+ with torch.no_grad(): # Disable gradient calculation
20
+ outputs = model(**inputs) # Forward pass
21
+ cls_embeddings = outputs.last_hidden_state[:, 0, :] # CLS token embedding
22
+ cls_embeddings = torch.nn.functional.normalize(cls_embeddings, p=2, dim=1) # Normalize embeddings
23
+ embeddings.append(cls_embeddings.cpu().numpy()) # Move to CPU and convert to numpy
24
+
25
+ return np.vstack(embeddings) # Stack all embeddings into a single array
src/tools/vision_caption_tool.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai.tools import BaseTool, tool
2
+ from pydantic import BaseModel, Field
3
+ from typing import Optional
4
+ from google import genai
5
+ from google.genai.types import Part, GenerateContentConfig
6
+ import os
7
+
8
+ # Schema for tool arguments, specifying image path and optional prompt
9
+ class VisionCaptionToolSchema(BaseModel):
10
+ image_path: str
11
+ prompt: Optional[str] = None
12
+
13
+ # Main tool class for generating radiology reports from chest X-ray images
14
+ class VisionCaptionTool(BaseTool):
15
+ name: str = "vision_caption_tool"
16
+ description: str = (
17
+ "Generates a structured radiology report from a chest X-ray image using optimized Gemini 2.5 Flash."
18
+ )
19
+ args_schema: type = VisionCaptionToolSchema
20
+ metadata: dict = {}
21
+
22
+ # Core method to run the tool logic
23
+ def _run(self, image_path: str, prompt: Optional[str] = None) -> str:
24
+ # Retrieve Gemini API key from metadata
25
+ api_key = self.metadata.get("GEMINI_API_KEY")
26
+ if not api_key:
27
+ raise ValueError("GEMINI_API_KEY not found in metadata.")
28
+ client = genai.Client(api_key=api_key)
29
+
30
+ # System prompt defines the expert persona and approach for the model
31
+ system_prompt = (
32
+ "You are a board-certified thoracic radiologist with over 20 years of experience in interpreting chest X-rays. "
33
+ "You are known for your meticulous attention to detail, clinical restraint, and deep respect for image-grounded reasoning. "
34
+ "You prioritize accuracy over speculation and communicate with diagnostic clarity, always aligning your impressions with what is visibly demonstrable in the radiograph."
35
+ )
36
+
37
+ # Use default optimized prompt if none is provided by the user
38
+ if prompt is None:
39
+ prompt = (
40
+ "Examine the chest X-ray step by step, following a structured A–G radiological workflow (Airway & Mediastinum, Bones & soft tissues, Cardiac silhouette, Diaphragm, Lung fields, Pleura, any Devices/foreign objects, and \"Global\" sanity checks). "
41
+ "For each region, mentally assess both normal and abnormal findings before synthesizing them into a cohesive narrative report.\n"
42
+ "All directional terms (left/right) must refer strictly to the PATIENT'S perspective.\n"
43
+ "When evaluating cardiac size on AP films, assume that mild to moderate enlargement may be projectional unless the heart silhouette is clearly disproportionate or supported by additional findings (e.g., pulmonary congestion). "
44
+ "If abnormal patterns are seen (e.g., opacities, effusion, pneumothorax, atelectasis, consolidation), propose the most likely clinical significance in a professional and cautious tone, consistent with expert radiology language. "
45
+ "Weigh the clinical importance of each finding, and simulate a brief SECOND-PASS REVIEW of the image to verify that no significant abnormalities were overlooked.\n"
46
+ "Your final output must be formatted with two sections only:\n"
47
+ "FINDINGS: Structured prose following (but not explicitly labeling) the A–G sweep. Mention technical limitations only if they meaningfully impact interpretation. Always explicitly comment on signs of chronic lung disease (emphysema, fibrosis, interstitial changes) as either present or absent.\n"
48
+ "IMPRESSION: A concise, prioritized interpretation that integrates key findings into a diagnostic hypothesis. Rank findings by CLINICAL SIGNIFICANCE, listing the most urgent or actionable abnormalities first. Remaining key diagnoses/differentials, each with a probability qualifier (\"probable\", \"possible\", etc.). Use confident, direct language—but avoid speculation beyond what is visibly supported."
49
+ )
50
+
51
+ # Determine the MIME type of the image based on its file extension
52
+ ext = os.path.splitext(image_path)[-1].lower()
53
+ if ext == ".png":
54
+ mime_type = "image/png"
55
+ elif ext in [".jpg", ".jpeg"]:
56
+ mime_type = "image/jpeg"
57
+ else:
58
+ raise ValueError(f"Unsupported image type: {ext}")
59
+
60
+ # Read the image file as bytes
61
+ with open(image_path, "rb") as f:
62
+ image_bytes = f.read()
63
+
64
+ # Prepare input parts for the Gemini model: image and prompt
65
+ parts = [
66
+ Part.from_bytes(data=image_bytes, mime_type=mime_type),
67
+ prompt
68
+ ]
69
+
70
+ # Generate content using Gemini 2.5 Flash with specified parameters
71
+ response = client.models.generate_content(
72
+ model="gemini-2.5-flash",
73
+ contents=parts,
74
+ config=GenerateContentConfig(
75
+ system_instruction=system_prompt,
76
+ temperature=0.2,
77
+ max_output_tokens=2048
78
+ )
79
+ )
80
+
81
+ # Return the generated report text, stripped of leading/trailing whitespace
82
+ return response.text.strip()
src/tools_loader.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from .tools.vision_caption_tool import VisionCaptionTool # Import vision caption tool
4
+ from .tools.pubmed_tool import PubmedRetrievalTool # Import PubMed retrieval tool
5
+ from .tools.iu_retrieval_tool import IUImpressionSearchTool # Import IU impression search tool
6
+
7
+ def get_tools():
8
+ """Create and return all configured tools"""
9
+
10
+ # Get paths and API keys
11
+ data_dir = Path(__file__).parent.parent / "data" # Define path to the data directory
12
+ groq_key = os.getenv("GROQ_API_KEY") # Retrieve GROQ API key from environment
13
+ gemini_key = os.getenv("GEMINI_API_KEY") # Retrieve Gemini API key from environment
14
+
15
+ # Create tools
16
+ vision_tool = VisionCaptionTool(metadata={"GEMINI_API_KEY": gemini_key}) # Initialize vision caption tool with Gemini API key
17
+ pubmed_tool = PubmedRetrievalTool(metadata={"DATA_DIR": str(data_dir), "TOP_K": 3}) # Initialize PubMed tool with data directory and top-k parameter
18
+ iu_tool = IUImpressionSearchTool(metadata={
19
+ "VEC_PATH": str(data_dir / "iu_vecs.npy"), # Path to IU vectors file
20
+ "IMPR_PATH": str(data_dir / "iu_impr.jsonl"), # Path to IU impressions file
21
+ "MODEL_ID": "hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224" # Model identifier for retrieval
22
+ })
23
+
24
+ # Return all tools in a dictionary
25
+ return {
26
+ "vision_tool": vision_tool,
27
+ "pubmed_tool": pubmed_tool,
28
+ "iu_tool": iu_tool
29
+ }