Spaces:
Running
Running
Abdullah Meda
commited on
Commit
·
7dc78b3
1
Parent(s):
4c57891
initial commit
Browse files- .gitignore +174 -0
- app.py +36 -0
- make_docs.py +131 -0
- repo2txt.py +177 -0
- repos_config.json +142 -0
.gitignore
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Initially taken from Github's Python gitignore file
|
2 |
+
|
3 |
+
# Byte-compiled / optimized / DLL files
|
4 |
+
__pycache__/
|
5 |
+
*.py[cod]
|
6 |
+
*$py.class
|
7 |
+
|
8 |
+
# C extensions
|
9 |
+
*.so
|
10 |
+
|
11 |
+
# tests and logs
|
12 |
+
tests/fixtures/cached_*_text.txt
|
13 |
+
logs/
|
14 |
+
lightning_logs/
|
15 |
+
lang_code_data/
|
16 |
+
|
17 |
+
# Distribution / packaging
|
18 |
+
.Python
|
19 |
+
build/
|
20 |
+
develop-eggs/
|
21 |
+
dist/
|
22 |
+
downloads/
|
23 |
+
eggs/
|
24 |
+
.eggs/
|
25 |
+
lib/
|
26 |
+
lib64/
|
27 |
+
parts/
|
28 |
+
sdist/
|
29 |
+
var/
|
30 |
+
wheels/
|
31 |
+
*.egg-info/
|
32 |
+
.installed.cfg
|
33 |
+
*.egg
|
34 |
+
MANIFEST
|
35 |
+
|
36 |
+
# PyInstaller
|
37 |
+
# Usually these files are written by a python script from a template
|
38 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
39 |
+
*.manifest
|
40 |
+
*.spec
|
41 |
+
|
42 |
+
# Installer logs
|
43 |
+
pip-log.txt
|
44 |
+
pip-delete-this-directory.txt
|
45 |
+
|
46 |
+
# Unit test / coverage reports
|
47 |
+
htmlcov/
|
48 |
+
.tox/
|
49 |
+
.nox/
|
50 |
+
.coverage
|
51 |
+
.coverage.*
|
52 |
+
.cache
|
53 |
+
nosetests.xml
|
54 |
+
coverage.xml
|
55 |
+
*.cover
|
56 |
+
.hypothesis/
|
57 |
+
.pytest_cache/
|
58 |
+
|
59 |
+
# Translations
|
60 |
+
*.mo
|
61 |
+
*.pot
|
62 |
+
|
63 |
+
# Django stuff:
|
64 |
+
*.log
|
65 |
+
local_settings.py
|
66 |
+
db.sqlite3
|
67 |
+
|
68 |
+
# Flask stuff:
|
69 |
+
instance/
|
70 |
+
.webassets-cache
|
71 |
+
|
72 |
+
# Scrapy stuff:
|
73 |
+
.scrapy
|
74 |
+
|
75 |
+
# Sphinx documentation
|
76 |
+
docs/_build/
|
77 |
+
|
78 |
+
# PyBuilder
|
79 |
+
target/
|
80 |
+
|
81 |
+
# Jupyter Notebook
|
82 |
+
.ipynb_checkpoints
|
83 |
+
|
84 |
+
# IPython
|
85 |
+
profile_default/
|
86 |
+
ipython_config.py
|
87 |
+
|
88 |
+
# pyenv
|
89 |
+
.python-version
|
90 |
+
|
91 |
+
# celery beat schedule file
|
92 |
+
celerybeat-schedule
|
93 |
+
|
94 |
+
# SageMath parsed files
|
95 |
+
*.sage.py
|
96 |
+
|
97 |
+
# Environments
|
98 |
+
.env
|
99 |
+
.venv
|
100 |
+
env/
|
101 |
+
venv/
|
102 |
+
ENV/
|
103 |
+
env.bak/
|
104 |
+
venv.bak/
|
105 |
+
|
106 |
+
# Spyder project settings
|
107 |
+
.spyderproject
|
108 |
+
.spyproject
|
109 |
+
|
110 |
+
# Rope project settings
|
111 |
+
.ropeproject
|
112 |
+
|
113 |
+
# mkdocs documentation
|
114 |
+
/site
|
115 |
+
|
116 |
+
# mypy
|
117 |
+
.mypy_cache/
|
118 |
+
.dmypy.json
|
119 |
+
dmypy.json
|
120 |
+
|
121 |
+
# Pyre type checker
|
122 |
+
.pyre/
|
123 |
+
|
124 |
+
# vscode
|
125 |
+
.vs
|
126 |
+
.vscode
|
127 |
+
|
128 |
+
# Pycharm
|
129 |
+
.idea
|
130 |
+
|
131 |
+
# TF code
|
132 |
+
tensorflow_code
|
133 |
+
|
134 |
+
# Models
|
135 |
+
proc_data
|
136 |
+
|
137 |
+
# examples
|
138 |
+
runs
|
139 |
+
/runs_old
|
140 |
+
/wandb
|
141 |
+
/examples/runs
|
142 |
+
/examples/**/*.args
|
143 |
+
/examples/rag/sweep
|
144 |
+
|
145 |
+
# data
|
146 |
+
/data
|
147 |
+
serialization_dir
|
148 |
+
|
149 |
+
# emacs
|
150 |
+
*.*~
|
151 |
+
debug.env
|
152 |
+
|
153 |
+
# vim
|
154 |
+
.*.swp
|
155 |
+
|
156 |
+
#ctags
|
157 |
+
tags
|
158 |
+
|
159 |
+
# pre-commit
|
160 |
+
.pre-commit*
|
161 |
+
|
162 |
+
# .lock
|
163 |
+
*.lock
|
164 |
+
|
165 |
+
# DS_Store (MacOS)
|
166 |
+
.DS_Store
|
167 |
+
|
168 |
+
# ruff
|
169 |
+
.ruff_cache
|
170 |
+
|
171 |
+
# local
|
172 |
+
*.ipynb
|
173 |
+
docs/
|
174 |
+
repos/
|
app.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import subprocess
|
5 |
+
import tempfile
|
6 |
+
import shutil
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
def list_huggingface_resources_names() -> list[str]:
|
10 |
+
"""List all the names of the libraries, services, and other resources available within the HuggingFace ecosystem.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
A list of libraries, services, and other resources available within the HuggingFace ecosystem
|
14 |
+
"""
|
15 |
+
with open('repos_config.json', 'r') as f:
|
16 |
+
repos = json.load(f)
|
17 |
+
|
18 |
+
return [repo['title'] for repo in repos]
|
19 |
+
|
20 |
+
|
21 |
+
list_resources_demo = gr.Interface(
|
22 |
+
fn=list_huggingface_resources_names,
|
23 |
+
inputs=[],
|
24 |
+
outputs="json",
|
25 |
+
title="HuggingFace Ecosystem Explorer",
|
26 |
+
description="Explore the names of the libraries, services, and other resources available within the HuggingFace ecosystem"
|
27 |
+
)
|
28 |
+
|
29 |
+
# Create tabbed interface
|
30 |
+
demo = gr.TabbedInterface(
|
31 |
+
[list_resources_demo],
|
32 |
+
["List Resources"],
|
33 |
+
title="HuggingFace Ecosystem Documentation Explorer",
|
34 |
+
)
|
35 |
+
|
36 |
+
demo.launch(mcp_server=True)
|
make_docs.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Script to clone Hugging Face documentation repositories and organize them
|
3 |
+
based on their toctree structure with proper naming.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
import re
|
9 |
+
import shutil
|
10 |
+
import subprocess
|
11 |
+
import sys
|
12 |
+
import argparse
|
13 |
+
from tqdm import tqdm
|
14 |
+
from pathlib import Path
|
15 |
+
from typing import Dict, List, Optional, Tuple
|
16 |
+
import yaml
|
17 |
+
|
18 |
+
|
19 |
+
def parse_toctree_yaml(file_path: str) -> Optional[Dict]:
|
20 |
+
"""Parse a YAML-based toctree file."""
|
21 |
+
try:
|
22 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
23 |
+
return yaml.safe_load(f)
|
24 |
+
except Exception as e:
|
25 |
+
print(f"Error parsing YAML toctree {file_path}: {e}")
|
26 |
+
return None
|
27 |
+
|
28 |
+
|
29 |
+
def run_command(cmd: List[str], cwd: Optional[str] = None) -> bool:
|
30 |
+
"""Run a shell command and return success status."""
|
31 |
+
try:
|
32 |
+
result = subprocess.run(cmd, cwd=cwd, check=True, capture_output=True, text=True)
|
33 |
+
return True
|
34 |
+
except subprocess.CalledProcessError as e:
|
35 |
+
print(f"Error running command {' '.join(cmd)}: {e}")
|
36 |
+
print(f"STDOUT: {e.stdout}")
|
37 |
+
print(f"STDERR: {e.stderr}")
|
38 |
+
return False
|
39 |
+
|
40 |
+
def clone_repo(repo_url: str, dir_to_clone: str, target_dir: str) -> bool:
|
41 |
+
"""Clone a repository to the target directory."""
|
42 |
+
if os.path.exists(Path(target_dir) / Path(dir_to_clone)):
|
43 |
+
print(f"Directory {target_dir} already exists, skipping clone")
|
44 |
+
return True
|
45 |
+
|
46 |
+
# Clone without checking out any files
|
47 |
+
out_clone = run_command(["git", "clone", "--no-checkout", repo_url, target_dir])
|
48 |
+
if not out_clone: return False
|
49 |
+
|
50 |
+
# Initialize sparse checkout without cone mode
|
51 |
+
sparse_init = run_command(["git", "sparse-checkout", "init", "--no-cone"], cwd=target_dir)
|
52 |
+
if not sparse_init: return False
|
53 |
+
|
54 |
+
# Set sparse checkout patterns to only include the specified directory. Pattern explanation:
|
55 |
+
# '/*' - include all files at root level
|
56 |
+
# '!/*' - exclude all files at root level (overrides previous)
|
57 |
+
# f'/{dir_to_clone}/' - include the specific directory
|
58 |
+
# f'/{dir_to_clone}/**' - include everything under that directory
|
59 |
+
sparse_patterns = ['/*', '!/*', f'/{dir_to_clone}/', f'/{dir_to_clone}/**']
|
60 |
+
sparse_set = run_command(["git", "sparse-checkout", "set", "--no-cone"] + sparse_patterns, cwd=target_dir)
|
61 |
+
if not sparse_set: return False
|
62 |
+
|
63 |
+
# Check out the files based on sparse checkout configuration
|
64 |
+
checkout = run_command(["git", "checkout", "main"], cwd=target_dir)
|
65 |
+
if not checkout:
|
66 |
+
# Try 'master' if 'main' fails
|
67 |
+
checkout = run_command(["git", "checkout", "master"], cwd=target_dir)
|
68 |
+
if not checkout:
|
69 |
+
print(f"Failed to checkout main or master branch in {target_dir}")
|
70 |
+
return False
|
71 |
+
|
72 |
+
return True
|
73 |
+
|
74 |
+
|
75 |
+
def save_section_to_disk(section: Dict, file_path: Path, raw_docs_path: Path):
|
76 |
+
|
77 |
+
title = section["title"]
|
78 |
+
|
79 |
+
if "sections" in section:
|
80 |
+
file_path = file_path / title
|
81 |
+
os.makedirs(file_path, exist_ok=True)
|
82 |
+
for subsection in section["sections"]:
|
83 |
+
save_section_to_disk(subsection, file_path, raw_docs_path)
|
84 |
+
|
85 |
+
else:
|
86 |
+
try:
|
87 |
+
local_path = raw_docs_path / f"{section['local']}.md"
|
88 |
+
|
89 |
+
if not local_path.exists():
|
90 |
+
local_path = raw_docs_path / f"{section['local']}.mdx"
|
91 |
+
assert local_path.exists(), f"File {local_path} does not exist"
|
92 |
+
|
93 |
+
shutil.copy(local_path, file_path / f"{title}{local_path.suffix}")
|
94 |
+
|
95 |
+
except Exception as e:
|
96 |
+
# TODO: Handle symlinks, missing files, and other edge cases
|
97 |
+
pass
|
98 |
+
|
99 |
+
|
100 |
+
def make_docs(repos: Dict, args: Dict):
|
101 |
+
|
102 |
+
for repo in tqdm(repos, desc="Consolidating 🤗 Documentation"):
|
103 |
+
save_repo_docs_path = Path(f"{args.repos_dir}/{repo['repo_url'].split('/')[-1]}")
|
104 |
+
clone_repo(repo["repo_url"], repo["subfolder"], str(save_repo_docs_path))
|
105 |
+
|
106 |
+
repo_docs_path = save_repo_docs_path / repo["subfolder"]
|
107 |
+
toctree = parse_toctree_yaml(repo_docs_path / "_toctree.yml")
|
108 |
+
|
109 |
+
# print(toctree)
|
110 |
+
|
111 |
+
save_doc_path = Path(f"{args.docs_dir}/{repo['title']}")
|
112 |
+
os.makedirs(save_doc_path, exist_ok=True)
|
113 |
+
|
114 |
+
for block in toctree:
|
115 |
+
save_section_to_disk(block, save_doc_path, repo_docs_path)
|
116 |
+
|
117 |
+
shutil.rmtree(save_repo_docs_path)
|
118 |
+
|
119 |
+
shutil.rmtree(args.repos_dir)
|
120 |
+
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
parser = argparse.ArgumentParser()
|
124 |
+
parser.add_argument("--docs_dir", type=str, default="docs")
|
125 |
+
parser.add_argument("--repos_dir", type=str, default="repos")
|
126 |
+
args = parser.parse_args()
|
127 |
+
|
128 |
+
with open("repos_config.json", "r") as f:
|
129 |
+
repos = json.load(f)
|
130 |
+
|
131 |
+
make_docs(repos, args)
|
repo2txt.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
A fork of github.com/donoceidon/repo2txt/blob/main/src/repo2txt/repo2txt.py
|
3 |
+
|
4 |
+
This version only includes the functionality to document the structure of a repository containing .md and .mdx files.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import os
|
8 |
+
import argparse
|
9 |
+
|
10 |
+
def parse_args():
|
11 |
+
"""
|
12 |
+
Parse command-line arguments for the script.
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
argparse.Namespace: An object containing the parsed command-line arguments.
|
16 |
+
"""
|
17 |
+
parser = argparse.ArgumentParser(
|
18 |
+
description='Document the structure of a repository containing .md and .mdx files.',
|
19 |
+
epilog='Example usage:\n python repo2txt.py -r /path/to/repo -o output.txt',
|
20 |
+
formatter_class=argparse.RawDescriptionHelpFormatter
|
21 |
+
)
|
22 |
+
|
23 |
+
parser.add_argument('-r', '--repo_path', default=os.getcwd(),
|
24 |
+
help='Path to the directory to process. Defaults to the current directory.')
|
25 |
+
parser.add_argument('-o', '--output_file', default='output.txt',
|
26 |
+
help='Name for the output text file. Defaults to "output.txt".')
|
27 |
+
|
28 |
+
return parser.parse_args()
|
29 |
+
|
30 |
+
|
31 |
+
def should_ignore(item_path, output_file_path):
|
32 |
+
"""
|
33 |
+
Determine if a given item should be ignored.
|
34 |
+
Only includes .md and .mdx files, ignores hidden files and directories.
|
35 |
+
|
36 |
+
Args:
|
37 |
+
item_path (str): The path of the item (file or directory) to check.
|
38 |
+
output_file_path (str): The path of the output file being written to.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
bool: True if the item should be ignored, False otherwise.
|
42 |
+
"""
|
43 |
+
item_name = os.path.basename(item_path)
|
44 |
+
|
45 |
+
# Ignore the output file itself
|
46 |
+
if os.path.abspath(item_path) == os.path.abspath(output_file_path):
|
47 |
+
return True
|
48 |
+
|
49 |
+
# Ignore hidden files and directories
|
50 |
+
if item_name.startswith('.'):
|
51 |
+
return True
|
52 |
+
|
53 |
+
# If it's a file, only include .md and .mdx files
|
54 |
+
if os.path.isfile(item_path):
|
55 |
+
file_ext = os.path.splitext(item_name)[1].lower()
|
56 |
+
return file_ext not in ['.md', '.mdx']
|
57 |
+
|
58 |
+
# Include directories (they will be traversed)
|
59 |
+
return False
|
60 |
+
|
61 |
+
|
62 |
+
def write_tree(dir_path, output_file, output_file_path, prefix="", is_root=True):
|
63 |
+
"""
|
64 |
+
Recursively write the directory tree to the output file.
|
65 |
+
|
66 |
+
Args:
|
67 |
+
dir_path (str): The path of the directory to document.
|
68 |
+
output_file (file object): The file object to write to.
|
69 |
+
output_file_path (str): The path of the output file being written to.
|
70 |
+
prefix (str): Prefix string for line indentation and structure.
|
71 |
+
is_root (bool): Flag to indicate if the current directory is the root.
|
72 |
+
"""
|
73 |
+
if is_root:
|
74 |
+
output_file.write("└── ./\n")
|
75 |
+
# Add the actual directory name as a child of ./
|
76 |
+
actual_dir_name = os.path.basename(dir_path)
|
77 |
+
if actual_dir_name:
|
78 |
+
output_file.write(f" └── {actual_dir_name}\n")
|
79 |
+
prefix = " "
|
80 |
+
else:
|
81 |
+
prefix = " "
|
82 |
+
is_root = False
|
83 |
+
|
84 |
+
try:
|
85 |
+
items = os.listdir(dir_path)
|
86 |
+
except PermissionError:
|
87 |
+
return
|
88 |
+
|
89 |
+
items.sort()
|
90 |
+
|
91 |
+
# Filter out items that should be ignored
|
92 |
+
filtered_items = []
|
93 |
+
for item in items:
|
94 |
+
item_path = os.path.join(dir_path, item)
|
95 |
+
if not should_ignore(item_path, output_file_path):
|
96 |
+
filtered_items.append(item)
|
97 |
+
|
98 |
+
num_items = len(filtered_items)
|
99 |
+
|
100 |
+
for index, item in enumerate(filtered_items):
|
101 |
+
item_path = os.path.join(dir_path, item)
|
102 |
+
is_last_item = (index == num_items - 1)
|
103 |
+
new_prefix = "└── " if is_last_item else "├── "
|
104 |
+
child_prefix = " " if is_last_item else "│ "
|
105 |
+
|
106 |
+
output_file.write(f"{prefix}{new_prefix}{item}\n")
|
107 |
+
|
108 |
+
if os.path.isdir(item_path):
|
109 |
+
next_prefix = prefix + child_prefix
|
110 |
+
write_tree(item_path, output_file, output_file_path, next_prefix, is_root=False)
|
111 |
+
|
112 |
+
|
113 |
+
def write_file_content(file_path, output_file):
|
114 |
+
"""
|
115 |
+
Write the contents of a given file to the output file.
|
116 |
+
|
117 |
+
Args:
|
118 |
+
file_path (str): Path of the file to read.
|
119 |
+
output_file (file object): The file object to write the contents to.
|
120 |
+
"""
|
121 |
+
try:
|
122 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
|
123 |
+
for line in file:
|
124 |
+
output_file.write(line)
|
125 |
+
except Exception as e:
|
126 |
+
output_file.write(f"Error reading file: {e}\n")
|
127 |
+
|
128 |
+
|
129 |
+
def write_file_contents_in_order(dir_path, output_file, output_file_path, repo_path):
|
130 |
+
"""
|
131 |
+
Recursively document the contents of .md and .mdx files in directory order.
|
132 |
+
|
133 |
+
Args:
|
134 |
+
dir_path (str): The path of the directory to start documenting from.
|
135 |
+
output_file (file object): The file object to write the contents to.
|
136 |
+
output_file_path (str): The path of the output file being written to.
|
137 |
+
repo_path (str): The root path of the repository for relative path calculation.
|
138 |
+
"""
|
139 |
+
try:
|
140 |
+
items = os.listdir(dir_path)
|
141 |
+
except PermissionError:
|
142 |
+
return
|
143 |
+
|
144 |
+
items = sorted(item for item in items if not should_ignore(os.path.join(dir_path, item), output_file_path))
|
145 |
+
|
146 |
+
for item in items:
|
147 |
+
item_path = os.path.join(dir_path, item)
|
148 |
+
relative_path = os.path.relpath(item_path, start=repo_path)
|
149 |
+
|
150 |
+
if os.path.isdir(item_path):
|
151 |
+
write_file_contents_in_order(item_path, output_file, output_file_path, repo_path)
|
152 |
+
elif os.path.isfile(item_path):
|
153 |
+
output_file.write(f"\n\n---\nFile: /{relative_path}\n---\n\n")
|
154 |
+
write_file_content(item_path, output_file)
|
155 |
+
|
156 |
+
|
157 |
+
def main():
|
158 |
+
"""
|
159 |
+
Main function to execute the script logic.
|
160 |
+
"""
|
161 |
+
args = parse_args()
|
162 |
+
|
163 |
+
# Check if the provided directory path is valid
|
164 |
+
if not os.path.isdir(args.repo_path):
|
165 |
+
print(f"Error: The specified directory does not exist: {args.repo_path}")
|
166 |
+
return
|
167 |
+
|
168 |
+
with open(args.output_file, 'w', encoding='utf-8') as output_file:
|
169 |
+
output_file.write("Directory Structure:\n\n")
|
170 |
+
write_tree(args.repo_path, output_file, args.output_file, "", is_root=True)
|
171 |
+
write_file_contents_in_order(args.repo_path, output_file, args.output_file, args.repo_path)
|
172 |
+
|
173 |
+
print(f"Documentation generated successfully: {args.output_file}")
|
174 |
+
|
175 |
+
|
176 |
+
if __name__ == "__main__":
|
177 |
+
main()
|
repos_config.json
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"repo_url": "https://github.com/huggingface/hub-docs",
|
4 |
+
"subfolder": "docs/hub",
|
5 |
+
"title": "HuggingFace Hub"
|
6 |
+
},
|
7 |
+
{
|
8 |
+
"repo_url": "https://github.com/huggingface/huggingface_hub",
|
9 |
+
"subfolder": "docs/source/en",
|
10 |
+
"title": "HuggingFace Hub Python Library"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"repo_url": "https://github.com/huggingface/dataset-viewer",
|
14 |
+
"subfolder": "docs/source",
|
15 |
+
"title": "Dataset Viewer"
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"repo_url": "https://github.com/huggingface/hub-docs",
|
19 |
+
"subfolder": "docs/inference-providers",
|
20 |
+
"title": "Inference Providers"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"repo_url": "https://github.com/huggingface/text-generation-inference",
|
24 |
+
"subfolder": "docs/source",
|
25 |
+
"title": "Text Generation Inference"
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"repo_url": "https://github.com/huggingface/hf-endpoints-documentation",
|
29 |
+
"subfolder": "docs/source",
|
30 |
+
"title": "Inference Endpoints"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"repo_url": "https://github.com/huggingface/text-embeddings-inference",
|
34 |
+
"subfolder": "docs/source/en",
|
35 |
+
"title": "Text Embeddings Inference"
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"repo_url": "https://github.com/huggingface/hub-docs",
|
39 |
+
"subfolder": "docs/sagemaker/source",
|
40 |
+
"title": "Amazon SageMaker"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"repo_url": "https://github.com/huggingface/transformers",
|
44 |
+
"subfolder": "docs/source/en",
|
45 |
+
"title": "Transformers"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"repo_url": "https://github.com/huggingface/transformers.js",
|
49 |
+
"subfolder": "docs/source",
|
50 |
+
"title": "Transformers.js"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"repo_url": "https://github.com/huggingface/pytorch-image-models",
|
54 |
+
"subfolder": "hfdocs/source",
|
55 |
+
"title": "timm"
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"repo_url": "https://github.com/huggingface/diffusers",
|
59 |
+
"subfolder": "docs/source/en",
|
60 |
+
"title": "Diffusers"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"repo_url": "https://github.com/huggingface/tokenizers",
|
64 |
+
"subfolder": "docs/source-doc-builder",
|
65 |
+
"title": "Tokenizers"
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"repo_url": "https://github.com/huggingface/datasets",
|
69 |
+
"subfolder": "docs/source",
|
70 |
+
"title": "Datasets"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"repo_url": "https://github.com/huggingface/evaluate",
|
74 |
+
"subfolder": "docs/source",
|
75 |
+
"title": "Evaluate"
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"repo_url": "https://github.com/huggingface/peft",
|
79 |
+
"subfolder": "docs/source",
|
80 |
+
"title": "PEFT"
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"repo_url": "https://github.com/huggingface/optimum-neuron",
|
84 |
+
"subfolder": "docs/source",
|
85 |
+
"title": "Optimus Neuron: AWS Trainium & Inferentia"
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"repo_url": "https://github.com/bitsandbytes-foundation/bitsandbytes",
|
89 |
+
"subfolder": "docs/source",
|
90 |
+
"title": "bitsandbytes"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"repo_url": "https://github.com/huggingface/accelerate",
|
94 |
+
"subfolder": "docs/source",
|
95 |
+
"title": "Accelerate"
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"repo_url": "https://github.com/huggingface/trl",
|
99 |
+
"subfolder": "docs/source",
|
100 |
+
"title": "TRL"
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"repo_url": "https://github.com/huggingface/lighteval",
|
104 |
+
"subfolder": "docs/source",
|
105 |
+
"title": "Lighteval"
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"repo_url": "https://github.com/huggingface/optimum",
|
109 |
+
"subfolder": "docs/source",
|
110 |
+
"title": "Optimum"
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"repo_url": "https://github.com/huggingface/safetensors",
|
114 |
+
"subfolder": "docs/source",
|
115 |
+
"title": "Safetensors"
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"repo_url": "https://github.com/huggingface/chat-ui",
|
119 |
+
"subfolder": "docs/source",
|
120 |
+
"title": "Chat UI"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"repo_url": "https://github.com/huggingface/autotrain-advanced",
|
124 |
+
"subfolder": "docs/source",
|
125 |
+
"title": "AutoTrain"
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"repo_url": "https://github.com/huggingface/smolagents",
|
129 |
+
"subfolder": "docs/source/en",
|
130 |
+
"title": "smolagents"
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"repo_url": "https://github.com/huggingface/lerobot",
|
134 |
+
"subfolder": "docs/source",
|
135 |
+
"title": "LeRobot"
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"repo_url": "https://github.com/huggingface/leaderboards",
|
139 |
+
"subfolder": "docs/source/en",
|
140 |
+
"title": "Leaderboards and Evaluations"
|
141 |
+
}
|
142 |
+
]
|