anycallgmail commited on
Commit
95f6583
·
verified ·
1 Parent(s): 609a9be

Create update_music.py

Browse files
Files changed (1) hide show
  1. update_music.py +99 -0
update_music.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from huggingface_hub import snapshot_download, HfApi
3
+ import os
4
+ import sys
5
+ import time
6
+ import json
7
+ import hashlib
8
+ from datetime import datetime
9
+
10
+ def get_dataset_info(repo_id, token):
11
+ """获取数据集的最新信息,用于检测更新"""
12
+ try:
13
+ api = HfApi(token=token)
14
+ info = api.repo_info(repo_id=repo_id, repo_type="dataset")
15
+ return {
16
+ "sha": info.sha,
17
+ "last_modified": info.last_modified.isoformat() if info.last_modified else None
18
+ }
19
+ except Exception as e:
20
+ print(f"获取数据集信息出错: {str(e)}")
21
+ return None
22
+
23
+ def save_dataset_info(info, music_dir):
24
+ """保存数据集信息到本地文件"""
25
+ info_file = os.path.join(music_dir, ".dataset_info.json")
26
+ try:
27
+ with open(info_file, "w") as f:
28
+ json.dump(info, f)
29
+ except Exception as e:
30
+ print(f"保存数据集信息出错: {str(e)}")
31
+
32
+ def load_dataset_info(music_dir):
33
+ """从本地文件加载数据集信息"""
34
+ info_file = os.path.join(music_dir, ".dataset_info.json")
35
+ if not os.path.exists(info_file):
36
+ return None
37
+
38
+ try:
39
+ with open(info_file, "r") as f:
40
+ return json.load(f)
41
+ except Exception as e:
42
+ print(f"加载数据集信息出错: {str(e)}")
43
+ return None
44
+
45
+ def update_music(dataset_name, token, music_dir, force=False):
46
+ """更新音乐文件,只在有变化时更新"""
47
+ print(f"[{datetime.now()}] 检查音乐数据集更新...")
48
+
49
+ # 获取远程数据集信息
50
+ remote_info = get_dataset_info(dataset_name, token)
51
+ if not remote_info:
52
+ print("无法获取远程数据集信息,跳过更新")
53
+ return False
54
+
55
+ # 获取本地数据集信息
56
+ local_info = load_dataset_info(music_dir)
57
+
58
+ # 检查是否需要更新
59
+ if not force and local_info and local_info.get("sha") == remote_info.get("sha"):
60
+ print("音乐数据集没有变化,无需更新")
61
+ return False
62
+
63
+ print(f"检测到音乐数据集有更新,开始下载...")
64
+
65
+ try:
66
+ # 下载数据集
67
+ snapshot_download(
68
+ repo_id=dataset_name,
69
+ repo_type="dataset",
70
+ local_dir=music_dir,
71
+ token=token
72
+ )
73
+
74
+ # 保存新的数据集信息
75
+ save_dataset_info(remote_info, music_dir)
76
+
77
+ print(f"[{datetime.now()}] 音乐数据集更新成功!")
78
+ return True
79
+ except Exception as e:
80
+ print(f"更新音乐数据集出错: {str(e)}")
81
+ return False
82
+
83
+ if __name__ == "__main__":
84
+ # 命令行参数: dataset_name token music_dir [interval] [force]
85
+ dataset_name = sys.argv[1]
86
+ token = sys.argv[2]
87
+ music_dir = sys.argv[3]
88
+
89
+ # 可选参数
90
+ interval = int(sys.argv[4]) if len(sys.argv) > 4 else 3600 # 默认1小时
91
+ force = sys.argv[5].lower() == "true" if len(sys.argv) > 5 else False
92
+
93
+ # 第一次运行时强制更新
94
+ update_music(dataset_name, token, music_dir, force=True)
95
+
96
+ # 定期检查更新
97
+ while True:
98
+ time.sleep(interval)
99
+ update_music(dataset_name, token, music_dir, force=force)