ysharma HF Staff commited on
Commit
073785a
Β·
verified Β·
1 Parent(s): af0c30f

Create certificate_upload_module.py

Browse files
Files changed (1) hide show
  1. certificate_upload_module.py +224 -0
certificate_upload_module.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import time
3
+ import logging
4
+ from PIL import Image as PILImage
5
+ from datasets import Dataset, Image
6
+ from huggingface_hub import HfApi, login
7
+ import pandas as pd
8
+ import os
9
+ from datetime import datetime
10
+
11
+ # Setup logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Replace with your actual certificate dataset name
16
+ CERTIFICATE_DATASET_NAME = "ysharma/gradio-agents-mcp-hackathon-certificates"
17
+
18
+ def safe_add_certificate_to_dataset(certificate_image, hf_username, max_retries=5, retry_delay=3):
19
+ """
20
+ Safely add new certificate to the image dataset with bulletproof error handling
21
+ Handles both empty datasets and existing datasets
22
+
23
+ Args:
24
+ certificate_image: PIL Image object or image file path
25
+ hf_username: HF username string
26
+ max_retries: Maximum number of retry attempts
27
+ retry_delay: Delay between retries in seconds
28
+
29
+ Returns:
30
+ tuple: (success: bool, message: str)
31
+ """
32
+ try:
33
+ logger.info("Starting new certificate upload process")
34
+
35
+ # Validate inputs
36
+ if not hf_username or not hf_username.strip():
37
+ return False, "❌ Error: HF username is required"
38
+
39
+ if certificate_image is None:
40
+ return False, "❌ Error: Certificate image is required"
41
+
42
+ # Normalize username
43
+ hf_username = hf_username.strip()
44
+
45
+ logger.info(f"Processing certificate for user: {hf_username}")
46
+
47
+ # Multi-attempt loading with different strategies
48
+ existing_dataset = None
49
+ load_successful = False
50
+ is_empty_dataset = False
51
+
52
+ for attempt in range(max_retries):
53
+ logger.info(f"Loading attempt {attempt + 1}/{max_retries}")
54
+
55
+ try:
56
+ # Strategy 1: Load dataset directly
57
+ from datasets import load_dataset
58
+ existing_dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train")
59
+ logger.info(f"Successfully loaded {len(existing_dataset)} existing certificates")
60
+ load_successful = True
61
+ break
62
+
63
+ except Exception as load_error:
64
+ error_str = str(load_error).lower()
65
+ logger.warning(f"Attempt {attempt + 1} failed: {str(load_error)[:100]}")
66
+
67
+ # Check if it's an empty dataset error
68
+ if "corresponds to no data" in error_str or "no data" in error_str:
69
+ logger.info("Dataset appears to be empty - will create first entry")
70
+ is_empty_dataset = True
71
+ load_successful = True
72
+ existing_dataset = None # Will create new
73
+ break
74
+
75
+ if attempt < max_retries - 1:
76
+ logger.info(f"Waiting {retry_delay} seconds before retry...")
77
+ time.sleep(retry_delay)
78
+ continue
79
+
80
+ # Handle the case where we couldn't load and it's not an empty dataset
81
+ if not load_successful:
82
+ error_msg = "🚨 CRITICAL ERROR: Could not access dataset after multiple attempts."
83
+ logger.error(error_msg)
84
+ return False, (
85
+ "❌ Certificate upload temporarily unavailable due to technical issues. "
86
+ "Please try again in a few minutes. If the problem persists, contact support."
87
+ )
88
+
89
+ # Check for duplicates (only if we have existing data)
90
+ if existing_dataset is not None:
91
+ existing_labels = existing_dataset['label']
92
+ if hf_username in existing_labels:
93
+ logger.warning("Duplicate certificate attempt detected")
94
+ return False, f"❌ Error: A certificate for username '{hf_username}' already exists."
95
+
96
+ # Prepare the new certificate data
97
+ with tempfile.TemporaryDirectory() as temp_dir:
98
+ # Save the image to a temporary file
99
+ if isinstance(certificate_image, PILImage.Image):
100
+ # If it's already a PIL Image
101
+ temp_image_path = os.path.join(temp_dir, f"certificate_{hf_username}_{int(time.time())}.png")
102
+ certificate_image.save(temp_image_path, "PNG")
103
+ elif isinstance(certificate_image, str) and os.path.exists(certificate_image):
104
+ # If it's a file path
105
+ temp_image_path = certificate_image
106
+ else:
107
+ return False, "❌ Error: Invalid image format provided"
108
+
109
+ # Create new dataset entry
110
+ new_data = {
111
+ "image": [temp_image_path],
112
+ "label": [hf_username]
113
+ }
114
+
115
+ new_dataset = Dataset.from_dict(new_data).cast_column("image", Image())
116
+ logger.info("Created new certificate dataset entry")
117
+
118
+ # Combine with existing dataset or use new dataset if empty
119
+ if existing_dataset is not None and not is_empty_dataset:
120
+ try:
121
+ combined_dataset = existing_dataset.concatenate_datasets([new_dataset])
122
+ logger.info(f"Combined dataset now has {len(combined_dataset)} certificates (was {len(existing_dataset)})")
123
+ except Exception as concat_error:
124
+ logger.error(f"Failed to combine datasets: {concat_error}")
125
+ return False, f"❌ Error combining datasets: {str(concat_error)}"
126
+ else:
127
+ # First certificate in empty dataset
128
+ combined_dataset = new_dataset
129
+ logger.info("Creating first certificate in empty dataset")
130
+
131
+ # Create timestamped backup before upload (only if not first certificate)
132
+ backup_timestamp = int(time.time())
133
+
134
+ try:
135
+ # Create backup first (only if we had existing data)
136
+ if existing_dataset is not None and not is_empty_dataset:
137
+ backup_name = f"{CERTIFICATE_DATASET_NAME}-auto-backup-{backup_timestamp}"
138
+ logger.info(f"Creating backup: {backup_name}")
139
+ combined_dataset.push_to_hub(backup_name, private=True)
140
+
141
+ logger.info("Pushing to main certificate dataset...")
142
+ combined_dataset.push_to_hub(CERTIFICATE_DATASET_NAME, private=True)
143
+
144
+ logger.info("βœ… Successfully saved new certificate")
145
+ logger.info(f"Total certificates in dataset: {len(combined_dataset)}")
146
+
147
+ # Quick verification
148
+ time.sleep(2)
149
+ try:
150
+ api = HfApi()
151
+ verify_files = api.list_repo_files(CERTIFICATE_DATASET_NAME, repo_type="dataset")
152
+ logger.info("βœ… Upload verification: Files updated successfully")
153
+ except:
154
+ logger.warning("⚠️ Could not verify upload (this may be normal)")
155
+
156
+ return True, f"βœ… Certificate successfully uploaded for {hf_username}!"
157
+
158
+ except Exception as upload_error:
159
+ error_msg = str(upload_error).lower()
160
+ if any(indicator in error_msg for indicator in ['rate limit', '429', 'too many requests']):
161
+ logger.warning("🚨 Rate limit hit - certificate upload system temporarily busy")
162
+ return False, "⏳ Certificate upload temporarily unavailable due to high server load. Please try again in 10-15 minutes."
163
+ else:
164
+ logger.error(f"Upload failed: {upload_error}")
165
+ return False, f"❌ Certificate upload failed: {str(upload_error)}"
166
+
167
+ except Exception as e:
168
+ logger.error(f"❌ Unexpected error in certificate upload: {e}")
169
+ import traceback
170
+ traceback.print_exc()
171
+ return False, f"❌ Certificate upload failed: {str(e)}"
172
+
173
+ def upload_user_certificate(certificate_image, hf_username):
174
+ """
175
+ Main function to upload user's certificate - simplified interface for Space A
176
+
177
+ Args:
178
+ certificate_image: PIL Image object of the generated certificate
179
+ hf_username: User's Hugging Face username
180
+
181
+ Returns:
182
+ tuple: (success: bool, message: str)
183
+ """
184
+
185
+ # Basic validation
186
+ if not certificate_image:
187
+ return False, "❌ No certificate image provided"
188
+
189
+ if not hf_username or not hf_username.strip():
190
+ return False, "❌ HF username is required"
191
+
192
+ # Call the safe upload function
193
+ success, message = safe_add_certificate_to_dataset(certificate_image, hf_username)
194
+
195
+ return success, message
196
+
197
+ def check_certificate_dataset_health():
198
+ """Check if the certificate dataset is accessible and healthy"""
199
+ try:
200
+ from datasets import load_dataset
201
+ try:
202
+ dataset = load_dataset(CERTIFICATE_DATASET_NAME, split="train")
203
+ logger.info(f"βœ… Certificate dataset health check passed - found {len(dataset)} certificates")
204
+ return True
205
+ except Exception as e:
206
+ error_str = str(e).lower()
207
+ if "corresponds to no data" in error_str or "no data" in error_str:
208
+ logger.info("βœ… Certificate dataset exists but is empty - ready for first upload")
209
+ return True
210
+ else:
211
+ logger.error(f"❌ Certificate dataset health check failed: {e}")
212
+ return False
213
+ except Exception as e:
214
+ logger.error(f"❌ Certificate dataset health check failed: {e}")
215
+ return False
216
+
217
+ # Health check on import
218
+ logger.info("πŸš€ Certificate Upload Module Initialized")
219
+ logger.info(f"πŸ“Š Target Dataset: {CERTIFICATE_DATASET_NAME}")
220
+
221
+ if check_certificate_dataset_health():
222
+ logger.info("βœ… Certificate dataset is healthy and ready")
223
+ else:
224
+ logger.warning("⚠️ Certificate dataset health warnings detected")