interview-ai-detector / download-huggingface-model.py
panduwana's picture
un-gcp-ize
933b7b6
from huggingface_hub import login, model_info, whoami
from transformers import AutoModelForCausalLM, AutoTokenizer
from dotenv import load_dotenv
from datetime import datetime
import json, argparse, sys, os
def is_interactive():
if sys.stdin.isatty():
return True
return hasattr(sys, 'ps1')
def try_interactive_login():
try:
login()
except KeyboardInterrupt:
print("^C")
exit()
def dumps(x):
def jsonable(obj):
d = {}
try:
json.dumps(obj)
except:
if isinstance(obj, datetime):
return obj.isoformat()
try:
d = vars(obj)
except:
return "..."
else:
return obj
for key, value in d.items():
d[key] = jsonable(value)
return d
return json.dumps(jsonable(x), indent=4, separators=(',', ': '))
parser = argparse.ArgumentParser()
parser.add_argument('model_id', type=str)
args = parser.parse_args()
model_id = args.model_id
path = os.path.join(os.getcwd(), ".hf_home", model_id)
print("Downloading to", path)
if os.path.exists(path):
print(f"{path} already exists, aborting. (To redownload, rm it first).")
exit()
try:
model_info(model_id)
except Exception as e:
print(e)
exit(1)
try:
user_info = whoami()
except Exception as e:
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
load_dotenv()
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if huggingface_token:
print("Logging in with env HUGGINGFACE_TOKEN")
try:
login(huggingface_token)
except Exception as e:
print(e)
try_interactive_login()
elif is_interactive():
try_interactive_login()
else:
print("Missing env: HUGGINGFACE_TOKEN")
exit(1)
user_info = whoami()
print("Authenticated as:", dumps(user_info))
try:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
except OSError as e:
print(e)
exit(1)
tokenizer.save_pretrained(path)
model.save_pretrained(path)