nbugs commited on
Commit
febf61a
·
verified ·
1 Parent(s): 94bb158

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +14 -6
sync_data.sh CHANGED
@@ -1,7 +1,7 @@
1
  #!/bin/bash
2
 
3
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
4
- echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
5
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
6
  exit 0
7
  fi
@@ -9,36 +9,43 @@ fi
9
  export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
10
 
11
  restore_latest() {
12
- echo "Restoring latest backup..."
13
  python3 -c "
14
  from huggingface_hub import HfApi
15
  import os
 
16
  api = HfApi()
17
- files = api.list_repo_files(repo_id='${DATASET_ID}', repo_type='dataset')
18
  backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')])
 
19
  if backup_files:
20
  latest = backup_files[-1]
 
21
  api.hf_hub_download(repo_id='${DATASET_ID}', filename=latest, repo_type='dataset', local_dir='/tmp')
22
  os.system(f'tar -xzf /tmp/{latest} -C /app/app')
23
  os.remove(f'/tmp/{latest}')
24
- print(f'Restored from {latest}')
25
  else:
26
- print('No backup found')
27
  "
28
  }
29
 
30
  sync_data() {
 
 
 
31
  while true; do
32
  echo "Starting backup at $(date)"
33
  cd /app
34
  timestamp=$(date +%Y%m%d_%H%M%S)
35
  backup_file="backup_${timestamp}.tar.gz"
 
36
  if [ -d "app" ] && [ "$(ls -A app 2>/dev/null)" ]; then
37
  tar -czf "/tmp/${backup_file}" app/
38
  python3 -c "
39
  from huggingface_hub import HfApi
40
  api = HfApi()
41
- api.upload_file(path_or_fileobj='/tmp/${backup_file}', path_in_repo='${backup_file}', repo_id='${DATASET_ID}', repo_type='dataset')
42
  print('Backup uploaded successfully')
43
  backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')])
44
  for old_backup in backup_files[:-50]:
@@ -49,6 +56,7 @@ for old_backup in backup_files[:-50]:
49
  else
50
  echo "No data to backup"
51
  fi
 
52
  sleep ${SYNC_INTERVAL:-7200}
53
  done
54
  }
 
1
  #!/bin/bash
2
 
3
  if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
4
+ echo "Starting without backup - missing HF_TOKEN or DATASET_ID"
5
  exec uvicorn app.main:app --host 0.0.0.0 --port 7860
6
  exit 0
7
  fi
 
9
  export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
10
 
11
  restore_latest() {
12
+ echo "Checking for existing backups..."
13
  python3 -c "
14
  from huggingface_hub import HfApi
15
  import os
16
+
17
  api = HfApi()
18
+ files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
19
  backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')])
20
+
21
  if backup_files:
22
  latest = backup_files[-1]
23
+ print(f'Found backup: {latest}, downloading...')
24
  api.hf_hub_download(repo_id='${DATASET_ID}', filename=latest, repo_type='dataset', local_dir='/tmp')
25
  os.system(f'tar -xzf /tmp/{latest} -C /app/app')
26
  os.remove(f'/tmp/{latest}')
27
+ print(f'Successfully restored from {latest}')
28
  else:
29
+ print('No backup found, skipping restore.')
30
  "
31
  }
32
 
33
  sync_data() {
34
+ echo "Waiting ${SYNC_INTERVAL:-7200} seconds before first backup..."
35
+ sleep ${SYNC_INTERVAL:-7200}
36
+
37
  while true; do
38
  echo "Starting backup at $(date)"
39
  cd /app
40
  timestamp=$(date +%Y%m%d_%H%M%S)
41
  backup_file="backup_${timestamp}.tar.gz"
42
+
43
  if [ -d "app" ] && [ "$(ls -A app 2>/dev/null)" ]; then
44
  tar -czf "/tmp/${backup_file}" app/
45
  python3 -c "
46
  from huggingface_hub import HfApi
47
  api = HfApi()
48
+ api.upload_file('/tmp/${backup_file}', '${backup_file}', '${DATASET_ID}', repo_type='dataset')
49
  print('Backup uploaded successfully')
50
  backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')])
51
  for old_backup in backup_files[:-50]:
 
56
  else
57
  echo "No data to backup"
58
  fi
59
+ echo "Next backup in ${SYNC_INTERVAL:-7200} seconds..."
60
  sleep ${SYNC_INTERVAL:-7200}
61
  done
62
  }