File size: 3,473 Bytes
fd4ffa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os

import requests
from tqdm import tqdm


def download_files(url_dict, directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

    for key, url in url_dict.items():
        if key == "nips_train.zip" or key == "nips_test.zip":
            if not os.path.exists(os.path.join(directory, "nips_dataset")):
                os.mkdir(os.path.join(directory, "nips_dataset"))
            base_dir = os.path.join(directory, "nips_dataset")
        elif key == "deepbacs.zip":
            if not os.path.exists(os.path.join(directory, "deepbacs_dataset")):
                os.mkdir(os.path.join(directory, "deepbacs_dataset"))
            base_dir = os.path.join(directory, "deepbacs_dataset")
        elif key == "livecell":
            if not os.path.exists(os.path.join(directory, "livecell_dataset")):
                os.mkdir(os.path.join(directory, "livecell_dataset"))
            base_dir = os.path.join(directory, "livecell_dataset")
            print(f"Downloading from {key}: {url}")
            os.system(url + base_dir)
            continue

        try:
            print(f"Downloading from {key}: {url}")
            response = requests.get(url, stream=True, allow_redirects=True)
            total_size = int(response.headers.get("content-length", 0))

            # Extract the filename from the URL or use the key as the filename
            filename = os.path.basename(key)
            file_path = os.path.join(base_dir, filename)

            # Write the content to a file in the specified directory with progress
            with open(file_path, "wb") as file, tqdm(
                desc=filename, total=total_size, unit="iB", unit_scale=True, unit_divisor=1024
            ) as bar:
                for data in response.iter_content(chunk_size=1024):
                    size = file.write(data)
                    bar.update(size)

            print(f"Saved to {file_path}")
        except Exception as e:
            print(f"Failed to download from {key} ({url}). Reason: {str(e)}")


def main():
    parser = argparse.ArgumentParser(description="Process some integers.")
    parser.add_argument("--dir", type=str, help="Directory to download files to", default="/set/the/path")

    args = parser.parse_args()
    directory = os.path.normpath(args.dir)

    url_dict = {
        "deepbacs.zip": "https://zenodo.org/records/5551009/files/DeepBacs_Data_Segmentation_StarDist_MIXED_dataset.zip?download=1",
        "nips_test.zip": "https://zenodo.org/records/10719375/files/Testing.zip?download=1",
        "nips_train.zip": "https://zenodo.org/records/10719375/files/Training-labeled.zip?download=1",
        "livecell": "wget --recursive --no-parent --cut-dirs=0 --timestamping -i urls.txt --directory-prefix=",
        # Add URLs with keys here
    }
    download_files(url_dict, directory)


if __name__ == "__main__":
    main()