|
""" |
|
Script to download medical images from The Cancer Imaging Archive (TCIA) |
|
""" |
|
import os |
|
import argparse |
|
from tqdm import tqdm |
|
from tcia_rest_client import TCIA_REST_Client |
|
|
|
def download_tcia_collection(collection_name, output_dir, api_key=None): |
|
""" |
|
Download a specific collection from TCIA |
|
|
|
Args: |
|
collection_name (str): Name of the TCIA collection |
|
output_dir (str): Directory to save downloaded files |
|
api_key (str, optional): TCIA API key for private collections |
|
""" |
|
print(f"Downloading {collection_name} collection...") |
|
|
|
|
|
client = TCIA_REST_Client(api_key=api_key) |
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
patients = client.get_patient(collection=collection_name) |
|
|
|
for patient in tqdm(patients, desc="Downloading patients"): |
|
patient_id = patient['PatientID'] |
|
|
|
|
|
patient_dir = os.path.join(output_dir, patient_id) |
|
os.makedirs(patient_dir, exist_ok=True) |
|
|
|
|
|
studies = client.get_patient_study(PatientID=patient_id) |
|
|
|
for study in studies: |
|
study_uid = study['StudyInstanceUID'] |
|
|
|
|
|
series = client.get_series(StudyInstanceUID=study_uid) |
|
|
|
for s in series: |
|
series_uid = s['SeriesInstanceUID'] |
|
|
|
|
|
client.get_series_DICOM( |
|
SeriesInstanceUID=series_uid, |
|
downloadPath=patient_dir |
|
) |
|
|
|
print(f"Download complete. Files saved to {output_dir}") |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description='Download TCIA collections') |
|
parser.add_argument('--collection', type=str, required=True, help='TCIA collection name') |
|
parser.add_argument('--output', type=str, default='./tcia_data', help='Output directory') |
|
parser.add_argument('--api_key', type=str, help='TCIA API key (if needed)') |
|
|
|
args = parser.parse_args() |
|
|
|
download_tcia_collection(args.collection, args.output, args.api_key) |