GotThatData commited on
Commit
6a538c3
·
verified ·
1 Parent(s): dd6a41a
Files changed (1) hide show
  1. app.py +103 -1
app.py CHANGED
@@ -3,11 +3,14 @@ from pydrive2.drive import GoogleDrive
3
  import os
4
  import gradio as gr
5
  from datasets import load_dataset, Dataset, concatenate_datasets
 
 
6
  import pandas as pd
7
  from PIL import Image
8
  from tqdm import tqdm
9
  import logging
10
  import yaml
 
11
 
12
  # Set up logging
13
  logging.basicConfig(
@@ -53,6 +56,77 @@ def validate_input(folder_id, naming_convention):
53
  return False, "Naming convention should only contain letters, numbers, and underscores"
54
  return True, ""
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # DatasetManager Class
57
  class DatasetManager:
58
  def __init__(self, local_images_dir="downloaded_cards"):
@@ -60,6 +134,11 @@ class DatasetManager:
60
  self.drive = None
61
  self.dataset_name = "GotThatData/sports-cards"
62
  os.makedirs(local_images_dir, exist_ok=True)
 
 
 
 
 
63
 
64
  def authenticate_drive(self):
65
  """Authenticate with Google Drive."""
@@ -259,4 +338,27 @@ demo = gr.Interface(
259
  )
260
 
261
  if __name__ == "__main__":
262
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import os
4
  import gradio as gr
5
  from datasets import load_dataset, Dataset, concatenate_datasets
6
+ from huggingface_hub import create_repo, upload_file, login
7
+ from pathlib import Path
8
  import pandas as pd
9
  from PIL import Image
10
  from tqdm import tqdm
11
  import logging
12
  import yaml
13
+ import json
14
 
15
  # Set up logging
16
  logging.basicConfig(
 
56
  return False, "Naming convention should only contain letters, numbers, and underscores"
57
  return True, ""
58
 
59
+ def initialize_dataset():
60
+ """Initialize or verify the dataset structure."""
61
+ try:
62
+ # Check if the README.md exists, if not create it
63
+ readme_content = """# Sports Cards Dataset
64
+
65
+ This dataset contains sports card images with structured metadata. Each image is named using a consistent convention and includes relevant information about the card.
66
+
67
+ ## Dataset Structure
68
+
69
+ ```
70
+ sports_card_{number}.jpg - Card images
71
+ ```
72
+
73
+ ## Features
74
+ - file_path: Path to the image file
75
+ - original_name: Original filename of the card
76
+ - new_name: Standardized filename
77
+ - image: Image data
78
+
79
+ ## Usage
80
+ ```python
81
+ from datasets import load_dataset
82
+ dataset = load_dataset("GotThatData/sports-cards")
83
+ ```
84
+
85
+ ## License
86
+ This dataset is licensed under MIT.
87
+
88
+ ## Creator
89
+ Created by GotThatData
90
+ """
91
+ # Create dataset info content
92
+ dataset_info = {
93
+ "description": "A collection of sports card images with metadata",
94
+ "citation": "",
95
+ "homepage": "https://huggingface.co/datasets/GotThatData/sports-cards",
96
+ "license": "mit",
97
+ "features": {
98
+ "file_path": {"dtype": "string", "_type": "Value"},
99
+ "original_name": {"dtype": "string", "_type": "Value"},
100
+ "new_name": {"dtype": "string", "_type": "Value"},
101
+ "image": {"dtype": "string", "_type": "Value"}
102
+ },
103
+ "splits": ["train"]
104
+ }
105
+
106
+ # Write files
107
+ with open("README.md", "w") as f:
108
+ f.write(readme_content)
109
+ with open("dataset-info.json", "w") as f:
110
+ json.dump(dataset_info, f, indent=2)
111
+
112
+ # Upload files to repository
113
+ upload_file(
114
+ path_or_fileobj="README.md",
115
+ path_in_repo="README.md",
116
+ repo_id="GotThatData/sports-cards",
117
+ repo_type="dataset"
118
+ )
119
+ upload_file(
120
+ path_or_fileobj="dataset-info.json",
121
+ path_in_repo="dataset-info.json",
122
+ repo_id="GotThatData/sports-cards",
123
+ repo_type="dataset"
124
+ )
125
+
126
+ return True, "Dataset structure initialized successfully"
127
+ except Exception as e:
128
+ return False, f"Failed to initialize dataset: {str(e)}"
129
+
130
  # DatasetManager Class
131
  class DatasetManager:
132
  def __init__(self, local_images_dir="downloaded_cards"):
 
134
  self.drive = None
135
  self.dataset_name = "GotThatData/sports-cards"
136
  os.makedirs(local_images_dir, exist_ok=True)
137
+
138
+ # Initialize dataset structure
139
+ success, message = initialize_dataset()
140
+ if not success:
141
+ logger.warning(f"Dataset initialization warning: {message}")
142
 
143
  def authenticate_drive(self):
144
  """Authenticate with Google Drive."""
 
338
  )
339
 
340
  if __name__ == "__main__":
341
+ demo.launch(
342
+ server_name="0.0.0.0",
343
+ server_port=7860,
344
+ # Add security headers
345
+ headers=[
346
+ ("Permissions-Policy",
347
+ "accelerometer=(), "
348
+ "ambient-light-sensor=(), "
349
+ "battery=(), "
350
+ "camera=(), "
351
+ "document-domain=(), "
352
+ "layout-animations=(), "
353
+ "legacy-image-formats=(), "
354
+ "microphone=(), "
355
+ "oversized-images=(), "
356
+ "sync-xhr=(), "
357
+ "vr=(), "
358
+ "wake-lock=()"
359
+ ),
360
+ ("Cross-Origin-Embedder-Policy", "require-corp"),
361
+ ("Cross-Origin-Opener-Policy", "same-origin"),
362
+ ("Cross-Origin-Resource-Policy", "same-origin")
363
+ ]
364
+ )