import os import re import tempfile import shutil import git from huggingface_hub import ( create_repo, upload_folder, list_repo_files, # delete_file, # Not used Repository, whoami, ) import logging from pathlib import Path # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Function to parse markdown input def parse_markdown(markdown_input): """Parse markdown input to extract space details and file structure.""" space_info = {"repo_name_md": "", "owner_md": "", "files": []} current_file = None file_content = [] in_file_content = False # Tracks if we are inside a ### File: block content in_code_block = False # Tracks if we are inside a ``` code block ``` lines = markdown_input.strip().split("\n") for line_idx, line_content_orig in enumerate(lines): line_content_stripped = line_content_orig.strip() # Handle file content collection, especially for code blocks if in_file_content: if line_content_stripped.startswith("```"): if in_code_block: # Closing ``` file_content.append(line_content_orig) # Keep the closing backticks as part of content in_code_block = False # Don't immediately save here, let the next ### File or end of input handle it # This allows for text after a code block but before the next file. else: # Opening ``` in_code_block = True file_content.append(line_content_orig) elif in_code_block: # Inside a code block file_content.append(line_content_orig) elif not in_code_block: # Plain text line within ### File: block but outside ``` # Check if this line is a new file marker, if so, current file ends. if line_content_stripped.startswith("### File:") or line_content_stripped.startswith("## File Structure") or line_content_stripped.startswith("# Space:"): if current_file and file_content: space_info["files"].append({"path": current_file, "content": "\n".join(file_content)}) current_file = None # Reset file_content = [] in_file_content = False # Current file ended # Reprocess this line if it's a new file marker (will be handled by outer ifs) else: # Regular content line file_content.append(line_content_orig) # Detect major structural elements if line_content_stripped.startswith("# Space:"): if current_file and file_content: # Save previous file if any space_info["files"].append({"path": current_file, "content": "\n".join(file_content)}) full_space_name_md = line_content_stripped.replace("# Space:", "").strip() if "/" in full_space_name_md: space_info["owner_md"], space_info["repo_name_md"] = full_space_name_md.split("/", 1) else: space_info["repo_name_md"] = full_space_name_md current_file = None file_content = [] in_file_content = False in_code_block = False elif line_content_stripped.startswith("## File Structure"): if current_file and file_content: # Save previous file if any space_info["files"].append({"path": current_file, "content": "\n".join(file_content)}) current_file = None file_content = [] in_file_content = False in_code_block = False continue # Just a section header elif line_content_stripped.startswith("### File:"): if current_file and file_content: # Save content of the previous file space_info["files"].append({"path": current_file, "content": "\n".join(file_content)}) current_file = line_content_stripped.replace("### File:", "").strip() file_content = [] # Reset for new file in_file_content = True # Start collecting content lines for this file in_code_block = False # Reset code block state for new file # Note: 📄 and 📁 are ignored if ### File: is the primary mechanism as implemented. # If they are meant to define empty files, that logic would need to be added. # Current parser prioritizes ### File: sections for content. # Append the last file's content if any if current_file and file_content: space_info["files"].append({"path": current_file, "content": "\n".join(file_content)}) space_info["files"] = [f for f in space_info["files"] if f.get("path")] # Filter out empty path entries return space_info def _determine_repo_id(api_token, space_name_ui, owner_ui): """ Determines the final owner and constructs the repo_id. space_name_ui should be just the name, not 'owner/name'. owner_ui is the value from the UI's owner field. Returns (repo_id, error_message) """ if not space_name_ui: return None, "Error: Space Name cannot be empty." if "/" in space_name_ui: # User should not put slash in space name field return None, "Error: Space Name should not contain '/'. Please use the Owner field for the namespace." final_owner = owner_ui error_message = None if not final_owner: if not api_token: return None, "Error: API token is required to automatically determine owner when Owner field is empty." try: user_info = whoami(token=api_token) if user_info and 'name' in user_info: final_owner = user_info['name'] logger.info(f"Determined owner: {final_owner} from API token.") else: logger.error(f"whoami(token=...) returned: {user_info} - 'name' field missing or user_info is None.") error_message = "Error: Could not retrieve username from API token. Ensure token is valid and has 'Read profile' permissions. Or, specify Owner manually." except Exception as e: logger.error(f"Error calling whoami for owner: {str(e)}") error_message = f"Error retrieving username from API token: {str(e)}. Please specify Owner manually." if error_message: return None, error_message if not final_owner: return None, "Error: Owner could not be determined. Please provide an owner or ensure your API token is valid." return f"{final_owner}/{space_name_ui}", None # Function to create and populate a Space def create_space(api_token, space_name_ui, owner_ui, sdk_ui, markdown_input): """Create a Hugging Face Space and populate it with files from markdown input.""" try: if not api_token: return "Error: Please provide a valid Hugging Face API token." repo_id, err = _determine_repo_id(api_token, space_name_ui, owner_ui) if err: return err space_info = parse_markdown(markdown_input) if not space_info["files"]: return "Error: No files found in the markdown input. Ensure '### File: path/to/file.ext' markers are used correctly with content." # Create temporary directory with tempfile.TemporaryDirectory() as temp_dir: repo_local_path = Path(temp_dir) / "repo_content_for_upload" repo_local_path.mkdir(exist_ok=True) # Write files to temporary directory for file_info in space_info["files"]: if not file_info.get("path"): logger.warning(f"Skipping file with no path: {file_info}") continue file_path_abs = repo_local_path / file_info["path"] file_path_abs.parent.mkdir(parents=True, exist_ok=True) with open(file_path_abs, "w", encoding="utf-8") as f: f.write(file_info["content"]) logger.info(f"Wrote file: {file_path_abs}") # Create repository on Hugging Face try: create_repo( repo_id=repo_id, token=api_token, repo_type="space", # Correctly set space_sdk=sdk_ui, private=False, ) logger.info(f"Created Space repo: {repo_id}") except Exception as e: err_str = str(e).lower() if "already exists" in err_str or "you already created this repo" in err_str or "exists" in err_str: # More robust check logger.info(f"Space {repo_id} already exists, proceeding to upload/update files.") else: return f"Error creating Space '{repo_id}': {str(e)}" # Push to Hugging Face Space upload_folder( repo_id=repo_id, folder_path=str(repo_local_path), # upload_folder expects string path path_in_repo=".", token=api_token, repo_type="space", # ***** ADD THIS LINE ***** commit_message=f"Initial Space setup of {repo_id} via Builder", # allow_patterns=["*.py", "*.md", "*.txt", "Dockerfile", ".gitattributes", "*.json", "*.yaml", "*.yml"], # Example: be more specific if needed # ignore_patterns=["*.git/*", ".*", "__pycache__/*"], # Example ) logger.info(f"Uploaded files to Space: {repo_id}") return f"Successfully created/updated Space: [{repo_id}](https://huggingface.co/spaces/{repo_id})" except Exception as e: logger.exception(f"Error in create_space for {repo_id if 'repo_id' in locals() else 'unknown repo'}:") # Log full traceback return f"Error during Space creation/update: {str(e)}" # Function to view Space files def view_space_files(api_token, space_name_ui, owner_ui): """List files in a Hugging Face Space.""" repo_id_for_error = f"{owner_ui}/{space_name_ui}" if owner_ui else space_name_ui try: if not api_token: return "Error: Please provide a valid Hugging Face API token." repo_id, err = _determine_repo_id(api_token, space_name_ui, owner_ui) if err: return err files = list_repo_files(repo_id=repo_id, token=api_token, repo_type="space") # Correctly set if files: return f"Files in `{repo_id}`:\n\n" + "\n".join([f"- `{f}`" for f in files]) else: return f"No files found in the Space `{repo_id}`." except Exception as e: logger.exception(f"Error in view_space_files for {repo_id_for_error}:") return f"Error listing files for `{repo_id_for_error}`: {str(e)}" # Function to update a Space file def update_space_file(api_token, space_name_ui, owner_ui, file_path_in_repo, file_content, commit_message_ui): """Update a file in a Hugging Face Space with a commit.""" repo_id_for_error = f"{owner_ui}/{space_name_ui}" if owner_ui else space_name_ui try: if not api_token: return "Error: Please provide a valid Hugging Face API token." repo_id, err = _determine_repo_id(api_token, space_name_ui, owner_ui) if err: return err if not file_path_in_repo: return "Error: File Path cannot be empty." if not commit_message_ui: commit_message_ui = f"Update {file_path_in_repo} via Space Builder" with tempfile.TemporaryDirectory() as temp_dir: repo_local_clone_path = Path(temp_dir) / "cloned_space_repo" cloned_repo = Repository( local_dir=str(repo_local_clone_path), # Repository expects string path clone_from=f"https://huggingface.co/spaces/{repo_id}", # Ensure this URL is correct repo_type="space", # Correctly set use_auth_token=api_token, git_user="Space Builder Bot", git_email="space-builder@huggingface.co" ) logger.info(f"Cloned Space {repo_id} to {repo_local_clone_path}") full_local_file_path = cloned_repo.local_dir / file_path_in_repo # Path object arithmetic full_local_file_path.parent.mkdir(parents=True, exist_ok=True) with open(full_local_file_path, "w", encoding="utf-8") as f: f.write(file_content) logger.info(f"Wrote updated file {file_path_in_repo} locally.") cloned_repo.push_to_hub(commit_message=commit_message_ui) logger.info(f"Pushed changes for {file_path_in_repo} to {repo_id}") return f"Successfully updated `{file_path_in_repo}` in Space [{repo_id}](https://huggingface.co/spaces/{repo_id})" except Exception as e: logger.exception(f"Error in update_space_file for {repo_id_for_error}:") return f"Error updating file for `{repo_id_for_error}`: {str(e)}"