Spaces:
Sleeping
Sleeping
| from huggingface_hub import snapshot_download | |
| import os | |
| import shutil | |
| def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None): | |
| """ | |
| Downloads only files with specified extensions from a Hugging Face Space repository. | |
| Args: | |
| space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template"). | |
| local_dir (str): Local directory to store the downloaded files. | |
| file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']). | |
| If None, no filtering is applied (all files are downloaded). | |
| """ | |
| if not file_extensions: | |
| raise ValueError("You must specify a list of file extensions to filter by.") | |
| print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}") | |
| # Clear out local_dir if it already exists | |
| if os.path.exists(local_dir): | |
| shutil.rmtree(local_dir) | |
| # Convert file extensions to allow_patterns format (e.g., ['.py', '.md'] -> ['*.py', '*.md']) | |
| allow_patterns = [f"*{ext}" for ext in file_extensions] | |
| # Download directly to local_dir with filtering during download | |
| repo_path = snapshot_download( | |
| repo_id=space_id, | |
| repo_type="space", | |
| local_dir=local_dir, | |
| allow_patterns=allow_patterns | |
| ) | |
| # Count downloaded files for feedback | |
| copied_files = 0 | |
| for root, _, files in os.walk(local_dir): | |
| for file in files: | |
| if any(file.endswith(ext) for ext in file_extensions): | |
| rel_path = os.path.relpath(os.path.join(root, file), local_dir) | |
| print(f"DEBUG: Downloaded file: {rel_path}") | |
| copied_files += 1 | |
| print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}") | |
| # Example usage | |
| # download_filtered_space_files("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files | |
| from huggingface_hub import list_spaces | |
| def search_top_spaces(query: str, limit: int = 5): | |
| """ | |
| Search and return top Hugging Face Space repo IDs based on a keyword. | |
| Args: | |
| query (str): The keyword to search for (e.g., "image", "chatbot"). | |
| limit (int): Maximum number of results to return. | |
| Returns: | |
| List of repo IDs. | |
| """ | |
| results = list(list_spaces(search=query, sort="likes", direction=-1)) # Convert generator to list | |
| top_spaces = [space.id for space in results[:limit]] | |
| return top_spaces | |
| # Example usage | |
| # top_image_spaces = search_top_spaces("tic tac toe", limit=10) | |
| # print("Top games-related Spaces:") | |
| # for space_id in top_image_spaces: | |
| # print("-", space_id) | |