Spaces:
Sleeping
Sleeping
update
Browse files
app.py
CHANGED
@@ -3,11 +3,14 @@ from pydrive2.drive import GoogleDrive
|
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
from datasets import load_dataset, Dataset, concatenate_datasets
|
|
|
|
|
6 |
import pandas as pd
|
7 |
from PIL import Image
|
8 |
from tqdm import tqdm
|
9 |
import logging
|
10 |
import yaml
|
|
|
11 |
|
12 |
# Set up logging
|
13 |
logging.basicConfig(
|
@@ -53,6 +56,77 @@ def validate_input(folder_id, naming_convention):
|
|
53 |
return False, "Naming convention should only contain letters, numbers, and underscores"
|
54 |
return True, ""
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# DatasetManager Class
|
57 |
class DatasetManager:
|
58 |
def __init__(self, local_images_dir="downloaded_cards"):
|
@@ -60,6 +134,11 @@ class DatasetManager:
|
|
60 |
self.drive = None
|
61 |
self.dataset_name = "GotThatData/sports-cards"
|
62 |
os.makedirs(local_images_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
def authenticate_drive(self):
|
65 |
"""Authenticate with Google Drive."""
|
@@ -259,4 +338,27 @@ demo = gr.Interface(
|
|
259 |
)
|
260 |
|
261 |
if __name__ == "__main__":
|
262 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
from datasets import load_dataset, Dataset, concatenate_datasets
|
6 |
+
from huggingface_hub import create_repo, upload_file, login
|
7 |
+
from pathlib import Path
|
8 |
import pandas as pd
|
9 |
from PIL import Image
|
10 |
from tqdm import tqdm
|
11 |
import logging
|
12 |
import yaml
|
13 |
+
import json
|
14 |
|
15 |
# Set up logging
|
16 |
logging.basicConfig(
|
|
|
56 |
return False, "Naming convention should only contain letters, numbers, and underscores"
|
57 |
return True, ""
|
58 |
|
59 |
+
def initialize_dataset():
|
60 |
+
"""Initialize or verify the dataset structure."""
|
61 |
+
try:
|
62 |
+
# Check if the README.md exists, if not create it
|
63 |
+
readme_content = """# Sports Cards Dataset
|
64 |
+
|
65 |
+
This dataset contains sports card images with structured metadata. Each image is named using a consistent convention and includes relevant information about the card.
|
66 |
+
|
67 |
+
## Dataset Structure
|
68 |
+
|
69 |
+
```
|
70 |
+
sports_card_{number}.jpg - Card images
|
71 |
+
```
|
72 |
+
|
73 |
+
## Features
|
74 |
+
- file_path: Path to the image file
|
75 |
+
- original_name: Original filename of the card
|
76 |
+
- new_name: Standardized filename
|
77 |
+
- image: Image data
|
78 |
+
|
79 |
+
## Usage
|
80 |
+
```python
|
81 |
+
from datasets import load_dataset
|
82 |
+
dataset = load_dataset("GotThatData/sports-cards")
|
83 |
+
```
|
84 |
+
|
85 |
+
## License
|
86 |
+
This dataset is licensed under MIT.
|
87 |
+
|
88 |
+
## Creator
|
89 |
+
Created by GotThatData
|
90 |
+
"""
|
91 |
+
# Create dataset info content
|
92 |
+
dataset_info = {
|
93 |
+
"description": "A collection of sports card images with metadata",
|
94 |
+
"citation": "",
|
95 |
+
"homepage": "https://huggingface.co/datasets/GotThatData/sports-cards",
|
96 |
+
"license": "mit",
|
97 |
+
"features": {
|
98 |
+
"file_path": {"dtype": "string", "_type": "Value"},
|
99 |
+
"original_name": {"dtype": "string", "_type": "Value"},
|
100 |
+
"new_name": {"dtype": "string", "_type": "Value"},
|
101 |
+
"image": {"dtype": "string", "_type": "Value"}
|
102 |
+
},
|
103 |
+
"splits": ["train"]
|
104 |
+
}
|
105 |
+
|
106 |
+
# Write files
|
107 |
+
with open("README.md", "w") as f:
|
108 |
+
f.write(readme_content)
|
109 |
+
with open("dataset-info.json", "w") as f:
|
110 |
+
json.dump(dataset_info, f, indent=2)
|
111 |
+
|
112 |
+
# Upload files to repository
|
113 |
+
upload_file(
|
114 |
+
path_or_fileobj="README.md",
|
115 |
+
path_in_repo="README.md",
|
116 |
+
repo_id="GotThatData/sports-cards",
|
117 |
+
repo_type="dataset"
|
118 |
+
)
|
119 |
+
upload_file(
|
120 |
+
path_or_fileobj="dataset-info.json",
|
121 |
+
path_in_repo="dataset-info.json",
|
122 |
+
repo_id="GotThatData/sports-cards",
|
123 |
+
repo_type="dataset"
|
124 |
+
)
|
125 |
+
|
126 |
+
return True, "Dataset structure initialized successfully"
|
127 |
+
except Exception as e:
|
128 |
+
return False, f"Failed to initialize dataset: {str(e)}"
|
129 |
+
|
130 |
# DatasetManager Class
|
131 |
class DatasetManager:
|
132 |
def __init__(self, local_images_dir="downloaded_cards"):
|
|
|
134 |
self.drive = None
|
135 |
self.dataset_name = "GotThatData/sports-cards"
|
136 |
os.makedirs(local_images_dir, exist_ok=True)
|
137 |
+
|
138 |
+
# Initialize dataset structure
|
139 |
+
success, message = initialize_dataset()
|
140 |
+
if not success:
|
141 |
+
logger.warning(f"Dataset initialization warning: {message}")
|
142 |
|
143 |
def authenticate_drive(self):
|
144 |
"""Authenticate with Google Drive."""
|
|
|
338 |
)
|
339 |
|
340 |
if __name__ == "__main__":
|
341 |
+
demo.launch(
|
342 |
+
server_name="0.0.0.0",
|
343 |
+
server_port=7860,
|
344 |
+
# Add security headers
|
345 |
+
headers=[
|
346 |
+
("Permissions-Policy",
|
347 |
+
"accelerometer=(), "
|
348 |
+
"ambient-light-sensor=(), "
|
349 |
+
"battery=(), "
|
350 |
+
"camera=(), "
|
351 |
+
"document-domain=(), "
|
352 |
+
"layout-animations=(), "
|
353 |
+
"legacy-image-formats=(), "
|
354 |
+
"microphone=(), "
|
355 |
+
"oversized-images=(), "
|
356 |
+
"sync-xhr=(), "
|
357 |
+
"vr=(), "
|
358 |
+
"wake-lock=()"
|
359 |
+
),
|
360 |
+
("Cross-Origin-Embedder-Policy", "require-corp"),
|
361 |
+
("Cross-Origin-Opener-Policy", "same-origin"),
|
362 |
+
("Cross-Origin-Resource-Policy", "same-origin")
|
363 |
+
]
|
364 |
+
)
|