Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,235 +1,151 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
import shutil
|
7 |
-
import time
|
8 |
|
9 |
import gradio as gr
|
10 |
from huggingface_hub import HfApi
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
return files
|
47 |
-
|
48 |
-
|
49 |
-
def pull_from_hf(
|
50 |
-
hf_token: str,
|
51 |
-
hf_repo_id: str,
|
52 |
-
filename: str,
|
53 |
-
):
|
54 |
-
if not hf_repo_id:
|
55 |
-
raise gr.Error("Please enter the repo_id of huggingface")
|
56 |
-
if not filename:
|
57 |
-
raise gr.Error("Please enter the filename")
|
58 |
-
|
59 |
-
if "," in filename:
|
60 |
-
filename_list = filename.split(",")
|
61 |
-
for _filename in filename_list:
|
62 |
-
save_path = os.path.join(LOCAL_DIR, _filename)
|
63 |
-
if os.path.exists(save_path):
|
64 |
-
message = "the file already exists!"
|
65 |
-
return message
|
66 |
-
|
67 |
-
# download
|
68 |
-
hf_api = HfApi(token=hf_token)
|
69 |
-
hf_api.hf_hub_download(
|
70 |
-
repo_id=hf_repo_id,
|
71 |
repo_type="dataset",
|
72 |
-
filename=
|
73 |
-
cache_dir=
|
74 |
-
local_dir=
|
75 |
local_dir_use_symlinks=False
|
76 |
)
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
return
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
)
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
os.system(f"git lfs track *")
|
156 |
-
print(file_path)
|
157 |
-
current_working_directory = os.getcwd()
|
158 |
-
print(current_working_directory)
|
159 |
-
os.system('cd {ori_dir}')
|
160 |
-
|
161 |
-
os.system(f"git lfs track *")
|
162 |
-
os.system("git add .")
|
163 |
-
os.system(f"git commit -m 'upload {num} files'")
|
164 |
-
os.system(f"git push")
|
165 |
-
os.chdir(ori_dir)
|
166 |
-
else:
|
167 |
-
os.system(f"git lfs track '{filename}'")
|
168 |
-
os.system("git add .")
|
169 |
-
os.system(f"git commit -m 'upload {filename}'")
|
170 |
-
os.system(f"git push")
|
171 |
-
os.chdir(ori_dir)
|
172 |
-
# remove clone dir
|
173 |
-
if os.path.exists(clone_dir):
|
174 |
-
shutil.rmtree(clone_dir)
|
175 |
-
message = f'Pushed to https://www.modelscope.cn/datasets/{ms_repo_id} successfully!'
|
176 |
-
print(message)
|
177 |
-
return message
|
178 |
-
|
179 |
-
|
180 |
-
def handle(
|
181 |
-
hf_token: str,
|
182 |
-
ms_token: str,
|
183 |
-
repo_type: str,
|
184 |
-
hf_repo: str,
|
185 |
-
ms_repo: str,
|
186 |
-
):
|
187 |
-
clone_dir = ms_repo.split("/")[-1]
|
188 |
-
hf_file_list = hf_list_repo_files(hf_token, hf_repo, repo_type)
|
189 |
-
print(f"all file in hf: {hf_file_list}")
|
190 |
-
|
191 |
-
for filename in hf_file_list:
|
192 |
-
print(f"begin to process file: {filename}")
|
193 |
-
clone_from_ms(ms_token, ms_repo, clone_dir)
|
194 |
-
time.sleep(1)
|
195 |
-
pull_from_hf(hf_token, hf_repo, filename)
|
196 |
-
time.sleep(1)
|
197 |
-
move_file_from_local_to_clone_dir(filename, clone_dir)
|
198 |
-
time.sleep(1)
|
199 |
-
push_to_ms(USERNAME, EMAIL, ms_repo, clone_dir, filename)
|
200 |
-
print(f"process file finish: {filename}")
|
201 |
-
time.sleep(10)
|
202 |
-
|
203 |
-
|
204 |
-
with gr.Blocks() as demo:
|
205 |
-
gr.Markdown(
|
206 |
-
'''
|
207 |
-
This space uploads model from Huggingface to ModelScope.
|
208 |
-
**Please make sure that you're the owner of the repo or have permission from the owner to do so!**
|
209 |
-
# How to use this Space?
|
210 |
-
- Duplicate this Space and providing MS token (optional) and your read/write HF token (mandatory)
|
211 |
-
- Create your target model repo on HF. This step needs to be done manually. The Space doesn't do create an empty repo for you.
|
212 |
-
- In your own private Space, fill in information below.
|
213 |
-
- Click submit then watch for output in container log for progress.
|
214 |
-
- Create README.md file (since the metadata is not compatible with HF)
|
215 |
-
'''
|
216 |
-
)
|
217 |
-
hf_token = gr.Textbox(label="HuggingFace Token")
|
218 |
-
ms_token = gr.Textbox(label="ModelScope Git Token")
|
219 |
-
repo_type = gr.Textbox(label="Repo Type", value="dataset")
|
220 |
-
hf_repo = gr.Textbox(label="HuggingFace Repo")
|
221 |
-
ms_repo = gr.Textbox(label="ModelScope Repo")
|
222 |
-
|
223 |
-
with gr.Row():
|
224 |
-
button = gr.Button("Submit", variant="primary")
|
225 |
-
clear = gr.Button("Clear")
|
226 |
|
227 |
-
|
228 |
-
handle,
|
229 |
-
[hf_token, ms_token, repo_type, hf_repo, ms_repo],
|
230 |
-
outputs=None
|
231 |
-
)
|
232 |
|
233 |
if __name__ == "__main__":
|
|
|
234 |
demo.queue(max_size=1)
|
235 |
demo.launch(share=False, max_threads=1)
|
|
|
1 |
+
import logging
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import List, Optional
|
|
|
|
|
|
|
|
|
4 |
|
5 |
import gradio as gr
|
6 |
from huggingface_hub import HfApi
|
7 |
+
from modelscope.hub.api import HubApi
|
8 |
+
|
9 |
+
logging.basicConfig(
|
10 |
+
level=logging.INFO,
|
11 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
12 |
+
handlers=[logging.StreamHandler()]
|
13 |
+
)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
class HFToMSConverter:
|
17 |
+
def __init__(self, config: dict):
|
18 |
+
self.config = config
|
19 |
+
self.cache_dir = config.get('cache_dir', "hf2ms_cache")
|
20 |
+
self.local_dir = config.get('local_dir', "hf2ms_local")
|
21 |
+
self.hf_api = HfApi(token=config['hf_token'])
|
22 |
+
self.ms_api = HubApi()
|
23 |
+
self.ms_api.login(config['ms_token'])
|
24 |
+
|
25 |
+
for dir_path in [self.local_dir, self.cache_dir]:
|
26 |
+
Path(dir_path).mkdir(exist_ok=True)
|
27 |
+
|
28 |
+
def get_hf_files(self, repo_id: str, repo_type: str = "dataset") -> List[str]:
|
29 |
+
"""获取HuggingFace仓库文件列表"""
|
30 |
+
return self.hf_api.list_repo_files(repo_id=repo_id, repo_type=repo_type)
|
31 |
+
|
32 |
+
def download_file(self, repo_id: str, filename: str) -> Optional[str]:
|
33 |
+
"""从HuggingFace下载文件"""
|
34 |
+
save_path = Path(self.local_dir) / filename
|
35 |
+
if save_path.exists():
|
36 |
+
logger.warning(f"文件已存在: {filename}")
|
37 |
+
return None
|
38 |
+
|
39 |
+
try:
|
40 |
+
self.hf_api.hf_hub_download(
|
41 |
+
repo_id=repo_id,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
repo_type="dataset",
|
43 |
+
filename=filename,
|
44 |
+
cache_dir=self.cache_dir,
|
45 |
+
local_dir=self.local_dir,
|
46 |
local_dir_use_symlinks=False
|
47 |
)
|
48 |
+
logger.info(f"成功下载文件: {filename}")
|
49 |
+
return str(save_path)
|
50 |
+
except Exception as e:
|
51 |
+
logger.error(f"下载失败 {filename}: {e}")
|
52 |
+
return None
|
53 |
+
|
54 |
+
def handle_file_operation(self, operation_type: str, *args) -> bool:
|
55 |
+
"""统一处理文件操作的异常"""
|
56 |
+
try:
|
57 |
+
if operation_type == "move":
|
58 |
+
src, dst = args
|
59 |
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
60 |
+
src.rename(dst)
|
61 |
+
logger.info(f"移动文件成功: {src.name}")
|
62 |
+
elif operation_type == "push":
|
63 |
+
ms_repo_id, clone_dir, filename = args
|
64 |
+
self.ms_api.upload_folder(
|
65 |
+
repo_id=f"{self.config['username']}/{ms_repo_id}",
|
66 |
+
folder_path=clone_dir,
|
67 |
+
commit_message='upload dataset folder',
|
68 |
+
repo_type='dataset'
|
69 |
+
)
|
70 |
+
logger.info(f"推送成功: {filename}")
|
71 |
+
return True
|
72 |
+
except Exception as e:
|
73 |
+
logger.error(f"{operation_type}操作失败: {e}")
|
74 |
+
return False
|
75 |
+
|
76 |
+
def move_file(self, filename: str, clone_dir: str) -> bool:
|
77 |
+
"""移动文件到目标目录"""
|
78 |
+
return self.handle_file_operation(
|
79 |
+
"move",
|
80 |
+
Path(self.local_dir) / filename,
|
81 |
+
Path(clone_dir) / filename
|
82 |
)
|
83 |
|
84 |
+
def push_to_ms(self, ms_repo_id: str, clone_dir: str, filename: str) -> bool:
|
85 |
+
"""推送到ModelScope"""
|
86 |
+
return self.handle_file_operation("push", ms_repo_id, clone_dir, filename)
|
87 |
+
|
88 |
+
def process_file(self, hf_repo: str, ms_repo: str, filename: str) -> bool:
|
89 |
+
"""处理单个文件的完整流程"""
|
90 |
+
clone_dir = ms_repo.split("/")[-1]
|
91 |
+
|
92 |
+
try:
|
93 |
+
if not all([
|
94 |
+
self.download_file(hf_repo, filename),
|
95 |
+
self.move_file(filename, clone_dir),
|
96 |
+
self.push_to_ms(ms_repo, clone_dir, filename)
|
97 |
+
]):
|
98 |
+
return False
|
99 |
+
return True
|
100 |
+
except Exception as e:
|
101 |
+
logger.error(f"处理文件失败 {filename}: {e}")
|
102 |
+
return False
|
103 |
+
|
104 |
+
def create_ui() -> gr.Blocks:
|
105 |
+
"""创建Gradio界面"""
|
106 |
+
with gr.Blocks() as demo:
|
107 |
+
gr.Markdown(
|
108 |
+
"""
|
109 |
+
# HuggingFace to ModelScope 数据迁移工具
|
110 |
+
请确保您拥有相应仓库的权限。
|
111 |
+
"""
|
112 |
+
)
|
113 |
+
|
114 |
+
with gr.Row():
|
115 |
+
hf_token = gr.Textbox(label="HuggingFace Token")
|
116 |
+
ms_token = gr.Textbox(label="ModelScope Token")
|
117 |
+
|
118 |
+
with gr.Row():
|
119 |
+
repo_type = gr.Textbox(label="仓库类型", value="dataset")
|
120 |
+
hf_repo = gr.Textbox(label="HuggingFace仓库")
|
121 |
+
ms_repo = gr.Textbox(label="ModelScope仓库")
|
122 |
+
|
123 |
+
with gr.Row():
|
124 |
+
submit = gr.Button("开始迁移", variant="primary")
|
125 |
+
clear = gr.Button("清除")
|
126 |
+
|
127 |
+
def handle_submit(hf_token, ms_token, repo_type, hf_repo, ms_repo):
|
128 |
+
config = {
|
129 |
+
'hf_token': hf_token,
|
130 |
+
'ms_token': ms_token,
|
131 |
+
'username': "thomas",
|
132 |
+
'email': "yx20001210@163.com",
|
133 |
+
}
|
134 |
+
|
135 |
+
converter = HFToMSConverter(config)
|
136 |
+
files = converter.get_hf_files(hf_repo, repo_type)
|
137 |
+
|
138 |
+
for filename in files:
|
139 |
+
converter.process_file(hf_repo, ms_repo, filename)
|
140 |
+
|
141 |
+
submit.click(
|
142 |
+
handle_submit,
|
143 |
+
inputs=[hf_token, ms_token, repo_type, hf_repo, ms_repo],
|
144 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
+
return demo
|
|
|
|
|
|
|
|
|
147 |
|
148 |
if __name__ == "__main__":
|
149 |
+
demo = create_ui()
|
150 |
demo.queue(max_size=1)
|
151 |
demo.launch(share=False, max_threads=1)
|