import os from huggingface_hub import login from transformers import AutoTokenizer, AutoModelForCausalLM hf_token = os.getenv('HF_TOKEN') if hf_token: login(token=hf_token) cache_dir = '/app/hf_cache' model_name = 'google/gemma-3-1b-it' # Construct the expected cache path expected_cache_path = os.path.join(cache_dir, 'transformers', model_name.replace('/', '--')) # Check and download model and tokenizer if not os.path.exists(expected_cache_path): print(f'Downloading model and tokenizer: {model_name}...') tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir) model.config.pad_token_id = tokenizer.eos_token_id else: print(f'Model and tokenizer ({model_name}) already cached. Skipping download.')