Spaces:
Sleeping
Sleeping
from huggingface_hub import snapshot_download | |
import os | |
import shutil | |
def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None): | |
""" | |
Downloads only files with specified extensions from a Hugging Face Space repository. | |
Args: | |
space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template"). | |
local_dir (str): Local directory to store the downloaded files. | |
file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']). | |
If None, no filtering is applied (all files are downloaded). | |
""" | |
if not file_extensions: | |
raise ValueError("You must specify a list of file extensions to filter by.") | |
print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}") | |
# Clear out local_dir if it already exists | |
if os.path.exists(local_dir): | |
shutil.rmtree(local_dir) | |
# Convert file extensions to allow_patterns format (e.g., ['.py', '.md'] -> ['*.py', '*.md']) | |
allow_patterns = [f"*{ext}" for ext in file_extensions] | |
# Download directly to local_dir with filtering during download | |
repo_path = snapshot_download( | |
repo_id=space_id, | |
repo_type="space", | |
local_dir=local_dir, | |
allow_patterns=allow_patterns | |
) | |
# Count downloaded files for feedback | |
copied_files = 0 | |
for root, _, files in os.walk(local_dir): | |
for file in files: | |
if any(file.endswith(ext) for ext in file_extensions): | |
rel_path = os.path.relpath(os.path.join(root, file), local_dir) | |
print(f"DEBUG: Downloaded file: {rel_path}") | |
copied_files += 1 | |
print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}") | |
# Example usage | |
# download_filtered_space_files("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files | |
from huggingface_hub import list_spaces | |
def search_top_spaces(query: str, limit: int = 5): | |
""" | |
Search and return top Hugging Face Space repo IDs based on a keyword. | |
Args: | |
query (str): The keyword to search for (e.g., "image", "chatbot"). | |
limit (int): Maximum number of results to return. | |
Returns: | |
List of repo IDs. | |
""" | |
results = list(list_spaces(search=query, sort="likes", direction=-1)) # Convert generator to list | |
top_spaces = [space.id for space in results[:limit]] | |
return top_spaces | |
# Example usage | |
# top_image_spaces = search_top_spaces("tic tac toe", limit=10) | |
# print("Top games-related Spaces:") | |
# for space_id in top_image_spaces: | |
# print("-", space_id) | |