from huggingface_hub import snapshot_download import os import shutil def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None): """ Downloads only files with specified extensions from a Hugging Face Space repository. Args: space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template"). local_dir (str): Local directory to store the downloaded files. file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']). If None, no filtering is applied (all files are downloaded). """ if not file_extensions: raise ValueError("You must specify a list of file extensions to filter by.") print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}") # Clear out local_dir if it already exists if os.path.exists(local_dir): shutil.rmtree(local_dir) # Convert file extensions to allow_patterns format (e.g., ['.py', '.md'] -> ['*.py', '*.md']) allow_patterns = [f"*{ext}" for ext in file_extensions] # Download directly to local_dir with filtering during download repo_path = snapshot_download( repo_id=space_id, repo_type="space", local_dir=local_dir, allow_patterns=allow_patterns ) # Count downloaded files for feedback copied_files = 0 for root, _, files in os.walk(local_dir): for file in files: if any(file.endswith(ext) for ext in file_extensions): copied_files += 1 print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}") # Example usage # download_filtered_space_files("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files from huggingface_hub import list_spaces def search_top_spaces(query: str, limit: int = 5): """ Search and return top Hugging Face Space repo IDs based on a keyword. Args: query (str): The keyword to search for (e.g., "image", "chatbot"). limit (int): Maximum number of results to return. Returns: List of repo IDs. """ results = list(list_spaces(search=query, sort="likes", direction=-1)) # Convert generator to list top_spaces = [space.id for space in results[:limit]] return top_spaces # Example usage # top_image_spaces = search_top_spaces("tic tac toe", limit=10) # print("Top games-related Spaces:") # for space_id in top_image_spaces: # print("-", space_id)