File size: 2,481 Bytes
5e1b2e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import argparse
import yaml
import os
import requests
from huggingface_hub import snapshot_download

def load_config(config_path):
    with open(config_path, 'r') as file:
        return yaml.safe_load(file)

def download_model(model_id, local_dir, platform, url=None):
    # Ensure the local directory exists
    os.makedirs(local_dir, exist_ok=True)
    
    if platform == "HuggingFace":
        print(f"Downloading model {model_id} from HuggingFace to {local_dir}")
        snapshot_download(
            repo_id=model_id,
            local_dir=local_dir,
            local_dir_use_symlinks=False,
            allow_patterns=["*.pth", "*.bin", "*.json"],  # Common model file extensions
            ignore_patterns=["*.md", "*.txt"],  # Ignore non-model files
        )
        print(f"Successfully downloaded {model_id} to {local_dir}")
    elif platform == "GitHub":
        if not url:
            raise ValueError(f"No URL provided for GitHub model: {model_id}")
        print(f"Downloading model {model_id} from GitHub URL {url} to {local_dir}")
        # Extract filename from URL
        filename = os.path.join(local_dir, os.path.basename(url))
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            with open(filename, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
            print(f"Successfully downloaded {model_id} to {filename}")
        else:
            raise ValueError(f"Failed to download {model_id} from {url}: HTTP {response.status_code}")
    else:
        raise ValueError(f"Unsupported platform: {platform}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Download model checkpoints from HuggingFace or GitHub.")
    parser.add_argument('--config', type=str, default="configs/model_ckpts.yaml",
                        help="Path to the YAML configuration file")
    args = parser.parse_args()

    # Load the YAML configuration
    config = load_config(args.config)

    # Iterate through models in the config
    for model_config in config:
        model_id = model_config['model_id']
        local_dir = model_config['local_dir']
        platform = model_config['platform']
        url = model_config.get('url')  # Get URL if it exists, None otherwise
        download_model(model_id, local_dir, platform, url)