Update data_download.py
Browse files- data_download.py +64 -0
data_download.py
CHANGED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Script to download medical images from The Cancer Imaging Archive (TCIA)
|
3 |
+
"""
|
4 |
+
import os
|
5 |
+
import argparse
|
6 |
+
from tqdm import tqdm
|
7 |
+
from tcia_rest_client import TCIA_REST_Client
|
8 |
+
|
9 |
+
def download_tcia_collection(collection_name, output_dir, api_key=None):
|
10 |
+
"""
|
11 |
+
Download a specific collection from TCIA
|
12 |
+
|
13 |
+
Args:
|
14 |
+
collection_name (str): Name of the TCIA collection
|
15 |
+
output_dir (str): Directory to save downloaded files
|
16 |
+
api_key (str, optional): TCIA API key for private collections
|
17 |
+
"""
|
18 |
+
print(f"Downloading {collection_name} collection...")
|
19 |
+
|
20 |
+
# Initialize TCIA client
|
21 |
+
client = TCIA_REST_Client(api_key=api_key)
|
22 |
+
|
23 |
+
# Create output directory
|
24 |
+
os.makedirs(output_dir, exist_ok=True)
|
25 |
+
|
26 |
+
# Get patients in the collection
|
27 |
+
patients = client.get_patient(collection=collection_name)
|
28 |
+
|
29 |
+
for patient in tqdm(patients, desc="Downloading patients"):
|
30 |
+
patient_id = patient['PatientID']
|
31 |
+
|
32 |
+
# Create patient directory
|
33 |
+
patient_dir = os.path.join(output_dir, patient_id)
|
34 |
+
os.makedirs(patient_dir, exist_ok=True)
|
35 |
+
|
36 |
+
# Get studies for patient
|
37 |
+
studies = client.get_patient_study(PatientID=patient_id)
|
38 |
+
|
39 |
+
for study in studies:
|
40 |
+
study_uid = study['StudyInstanceUID']
|
41 |
+
|
42 |
+
# Get series for study
|
43 |
+
series = client.get_series(StudyInstanceUID=study_uid)
|
44 |
+
|
45 |
+
for s in series:
|
46 |
+
series_uid = s['SeriesInstanceUID']
|
47 |
+
|
48 |
+
# Download series
|
49 |
+
client.get_series_DICOM(
|
50 |
+
SeriesInstanceUID=series_uid,
|
51 |
+
downloadPath=patient_dir
|
52 |
+
)
|
53 |
+
|
54 |
+
print(f"Download complete. Files saved to {output_dir}")
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
parser = argparse.ArgumentParser(description='Download TCIA collections')
|
58 |
+
parser.add_argument('--collection', type=str, required=True, help='TCIA collection name')
|
59 |
+
parser.add_argument('--output', type=str, default='./tcia_data', help='Output directory')
|
60 |
+
parser.add_argument('--api_key', type=str, help='TCIA API key (if needed)')
|
61 |
+
|
62 |
+
args = parser.parse_args()
|
63 |
+
|
64 |
+
download_tcia_collection(args.collection, args.output, args.api_key)
|