|
from huggingface_hub import snapshot_download |
|
import os |
|
import shutil |
|
|
|
def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None): |
|
""" |
|
Downloads only files with specified extensions from a Hugging Face Space repository. |
|
|
|
Args: |
|
space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template"). |
|
local_dir (str): Local directory to store the downloaded files. |
|
file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']). |
|
If None, no filtering is applied (all files are downloaded). |
|
""" |
|
if not file_extensions: |
|
raise ValueError("You must specify a list of file extensions to filter by.") |
|
|
|
print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}") |
|
|
|
|
|
if os.path.exists(local_dir): |
|
shutil.rmtree(local_dir) |
|
|
|
|
|
allow_patterns = [f"*{ext}" for ext in file_extensions] |
|
|
|
|
|
repo_path = snapshot_download( |
|
repo_id=space_id, |
|
repo_type="space", |
|
local_dir=local_dir, |
|
allow_patterns=allow_patterns |
|
) |
|
|
|
|
|
copied_files = 0 |
|
for root, _, files in os.walk(local_dir): |
|
for file in files: |
|
if any(file.endswith(ext) for ext in file_extensions): |
|
copied_files += 1 |
|
|
|
print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}") |
|
|
|
|
|
|
|
|
|
from huggingface_hub import list_spaces |
|
|
|
def search_top_spaces(query: str, limit: int = 5): |
|
""" |
|
Search and return top Hugging Face Space repo IDs based on a keyword. |
|
|
|
Args: |
|
query (str): The keyword to search for (e.g., "image", "chatbot"). |
|
limit (int): Maximum number of results to return. |
|
|
|
Returns: |
|
List of repo IDs. |
|
""" |
|
results = list(list_spaces(search=query, sort="likes", direction=-1)) |
|
top_spaces = [space.id for space in results[:limit]] |
|
|
|
return top_spaces |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|