TCIA-Detection-model / data_download.py
CCockrum's picture
Update data_download.py
65a61c3 verified
"""
Script to download medical images from The Cancer Imaging Archive (TCIA)
"""
import os
import argparse
from tqdm import tqdm
from tcia_rest_client import TCIA_REST_Client
def download_tcia_collection(collection_name, output_dir, api_key=None):
"""
Download a specific collection from TCIA
Args:
collection_name (str): Name of the TCIA collection
output_dir (str): Directory to save downloaded files
api_key (str, optional): TCIA API key for private collections
"""
print(f"Downloading {collection_name} collection...")
# Initialize TCIA client
client = TCIA_REST_Client(api_key=api_key)
# Create output directory
os.makedirs(output_dir, exist_ok=True)
# Get patients in the collection
patients = client.get_patient(collection=collection_name)
for patient in tqdm(patients, desc="Downloading patients"):
patient_id = patient['PatientID']
# Create patient directory
patient_dir = os.path.join(output_dir, patient_id)
os.makedirs(patient_dir, exist_ok=True)
# Get studies for patient
studies = client.get_patient_study(PatientID=patient_id)
for study in studies:
study_uid = study['StudyInstanceUID']
# Get series for study
series = client.get_series(StudyInstanceUID=study_uid)
for s in series:
series_uid = s['SeriesInstanceUID']
# Download series
client.get_series_DICOM(
SeriesInstanceUID=series_uid,
downloadPath=patient_dir
)
print(f"Download complete. Files saved to {output_dir}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Download TCIA collections')
parser.add_argument('--collection', type=str, required=True, help='TCIA collection name')
parser.add_argument('--output', type=str, default='./tcia_data', help='Output directory')
parser.add_argument('--api_key', type=str, help='TCIA API key (if needed)')
args = parser.parse_args()
download_tcia_collection(args.collection, args.output, args.api_key)