music / update_music.py
anycallgmail's picture
Create update_music.py
95f6583 verified
#!/usr/bin/env python3
from huggingface_hub import snapshot_download, HfApi
import os
import sys
import time
import json
import hashlib
from datetime import datetime
def get_dataset_info(repo_id, token):
"""获取数据集的最新信息,用于检测更新"""
try:
api = HfApi(token=token)
info = api.repo_info(repo_id=repo_id, repo_type="dataset")
return {
"sha": info.sha,
"last_modified": info.last_modified.isoformat() if info.last_modified else None
}
except Exception as e:
print(f"获取数据集信息出错: {str(e)}")
return None
def save_dataset_info(info, music_dir):
"""保存数据集信息到本地文件"""
info_file = os.path.join(music_dir, ".dataset_info.json")
try:
with open(info_file, "w") as f:
json.dump(info, f)
except Exception as e:
print(f"保存数据集信息出错: {str(e)}")
def load_dataset_info(music_dir):
"""从本地文件加载数据集信息"""
info_file = os.path.join(music_dir, ".dataset_info.json")
if not os.path.exists(info_file):
return None
try:
with open(info_file, "r") as f:
return json.load(f)
except Exception as e:
print(f"加载数据集信息出错: {str(e)}")
return None
def update_music(dataset_name, token, music_dir, force=False):
"""更新音乐文件,只在有变化时更新"""
print(f"[{datetime.now()}] 检查音乐数据集更新...")
# 获取远程数据集信息
remote_info = get_dataset_info(dataset_name, token)
if not remote_info:
print("无法获取远程数据集信息,跳过更新")
return False
# 获取本地数据集信息
local_info = load_dataset_info(music_dir)
# 检查是否需要更新
if not force and local_info and local_info.get("sha") == remote_info.get("sha"):
print("音乐数据集没有变化,无需更新")
return False
print(f"检测到音乐数据集有更新,开始下载...")
try:
# 下载数据集
snapshot_download(
repo_id=dataset_name,
repo_type="dataset",
local_dir=music_dir,
token=token
)
# 保存新的数据集信息
save_dataset_info(remote_info, music_dir)
print(f"[{datetime.now()}] 音乐数据集更新成功!")
return True
except Exception as e:
print(f"更新音乐数据集出错: {str(e)}")
return False
if __name__ == "__main__":
# 命令行参数: dataset_name token music_dir [interval] [force]
dataset_name = sys.argv[1]
token = sys.argv[2]
music_dir = sys.argv[3]
# 可选参数
interval = int(sys.argv[4]) if len(sys.argv) > 4 else 3600 # 默认1小时
force = sys.argv[5].lower() == "true" if len(sys.argv) > 5 else False
# 第一次运行时强制更新
update_music(dataset_name, token, music_dir, force=True)
# 定期检查更新
while True:
time.sleep(interval)
update_music(dataset_name, token, music_dir, force=force)