from pydrive2.auth import GoogleAuth from pydrive2.drive import GoogleDrive import os import gradio as gr from datasets import load_dataset, Dataset, concatenate_datasets import pandas as pd from PIL import Image from tqdm import tqdm import logging import yaml # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Load settings if not os.path.exists("settings.yaml"): raise FileNotFoundError("settings.yaml file is missing. Please add it with 'client_secrets_file'.") with open('settings.yaml', 'r') as file: settings = yaml.safe_load(file) [... keep all the utility functions and DatasetManager class the same ...] def process_pipeline(folder_id, naming_convention): """Main pipeline for processing images and updating dataset.""" # Validate input if not folder_id or not naming_convention: return "Please provide both folder ID and naming convention", [] manager = DatasetManager() # Step 1: Authenticate Google Drive auth_success, auth_message = manager.authenticate_drive() if not auth_success: return auth_message, [] # Step 2: Download and rename files success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention) if not success: return message, [] # Step 3: Update Hugging Face dataset success, hf_message = manager.update_huggingface_dataset(renamed_files) return f"{message}\n{hf_message}", renamed_files def process_ui(folder_id, naming_convention): """UI handler for the process pipeline""" status, renamed_files = process_pipeline(folder_id, naming_convention) table_data = [[file['original_name'], file['new_name'], file['file_path']] for file in renamed_files] if renamed_files else [] return status, table_data # Simplified Gradio interface demo = gr.Interface( fn=process_ui, inputs=[ gr.Textbox( label="Google Drive Folder ID", placeholder="Enter the folder ID from the URL" ), gr.Textbox( label="Naming Convention", placeholder="e.g., sports_card", value="sports_card" ) ], outputs=[ gr.Textbox(label="Status"), gr.Dataframe( headers=["Original Name", "New Name", "File Path"] ) ], title="Sports Cards Dataset Processor", description=""" Instructions: 1. Enter the Google Drive folder ID (found in the folder's URL) 2. Specify a naming convention for the files (e.g., 'sports_card') 3. Click submit to start processing Note: Only image files will be processed. Invalid images will be skipped. """ ) if __name__ == "__main__": demo.launch()