CCockrum commited on
Commit
65a61c3
·
verified ·
1 Parent(s): 5eba304

Update data_download.py

Browse files
Files changed (1) hide show
  1. data_download.py +64 -0
data_download.py CHANGED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Script to download medical images from The Cancer Imaging Archive (TCIA)
3
+ """
4
+ import os
5
+ import argparse
6
+ from tqdm import tqdm
7
+ from tcia_rest_client import TCIA_REST_Client
8
+
9
+ def download_tcia_collection(collection_name, output_dir, api_key=None):
10
+ """
11
+ Download a specific collection from TCIA
12
+
13
+ Args:
14
+ collection_name (str): Name of the TCIA collection
15
+ output_dir (str): Directory to save downloaded files
16
+ api_key (str, optional): TCIA API key for private collections
17
+ """
18
+ print(f"Downloading {collection_name} collection...")
19
+
20
+ # Initialize TCIA client
21
+ client = TCIA_REST_Client(api_key=api_key)
22
+
23
+ # Create output directory
24
+ os.makedirs(output_dir, exist_ok=True)
25
+
26
+ # Get patients in the collection
27
+ patients = client.get_patient(collection=collection_name)
28
+
29
+ for patient in tqdm(patients, desc="Downloading patients"):
30
+ patient_id = patient['PatientID']
31
+
32
+ # Create patient directory
33
+ patient_dir = os.path.join(output_dir, patient_id)
34
+ os.makedirs(patient_dir, exist_ok=True)
35
+
36
+ # Get studies for patient
37
+ studies = client.get_patient_study(PatientID=patient_id)
38
+
39
+ for study in studies:
40
+ study_uid = study['StudyInstanceUID']
41
+
42
+ # Get series for study
43
+ series = client.get_series(StudyInstanceUID=study_uid)
44
+
45
+ for s in series:
46
+ series_uid = s['SeriesInstanceUID']
47
+
48
+ # Download series
49
+ client.get_series_DICOM(
50
+ SeriesInstanceUID=series_uid,
51
+ downloadPath=patient_dir
52
+ )
53
+
54
+ print(f"Download complete. Files saved to {output_dir}")
55
+
56
+ if __name__ == "__main__":
57
+ parser = argparse.ArgumentParser(description='Download TCIA collections')
58
+ parser.add_argument('--collection', type=str, required=True, help='TCIA collection name')
59
+ parser.add_argument('--output', type=str, default='./tcia_data', help='Output directory')
60
+ parser.add_argument('--api_key', type=str, help='TCIA API key (if needed)')
61
+
62
+ args = parser.parse_args()
63
+
64
+ download_tcia_collection(args.collection, args.output, args.api_key)