Spaces:
Running
Running
File size: 6,216 Bytes
7dc78b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
"""
A fork of github.com/donoceidon/repo2txt/blob/main/src/repo2txt/repo2txt.py
This version only includes the functionality to document the structure of a repository containing .md and .mdx files.
"""
import os
import argparse
def parse_args():
"""
Parse command-line arguments for the script.
Returns:
argparse.Namespace: An object containing the parsed command-line arguments.
"""
parser = argparse.ArgumentParser(
description='Document the structure of a repository containing .md and .mdx files.',
epilog='Example usage:\n python repo2txt.py -r /path/to/repo -o output.txt',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('-r', '--repo_path', default=os.getcwd(),
help='Path to the directory to process. Defaults to the current directory.')
parser.add_argument('-o', '--output_file', default='output.txt',
help='Name for the output text file. Defaults to "output.txt".')
return parser.parse_args()
def should_ignore(item_path, output_file_path):
"""
Determine if a given item should be ignored.
Only includes .md and .mdx files, ignores hidden files and directories.
Args:
item_path (str): The path of the item (file or directory) to check.
output_file_path (str): The path of the output file being written to.
Returns:
bool: True if the item should be ignored, False otherwise.
"""
item_name = os.path.basename(item_path)
# Ignore the output file itself
if os.path.abspath(item_path) == os.path.abspath(output_file_path):
return True
# Ignore hidden files and directories
if item_name.startswith('.'):
return True
# If it's a file, only include .md and .mdx files
if os.path.isfile(item_path):
file_ext = os.path.splitext(item_name)[1].lower()
return file_ext not in ['.md', '.mdx']
# Include directories (they will be traversed)
return False
def write_tree(dir_path, output_file, output_file_path, prefix="", is_root=True):
"""
Recursively write the directory tree to the output file.
Args:
dir_path (str): The path of the directory to document.
output_file (file object): The file object to write to.
output_file_path (str): The path of the output file being written to.
prefix (str): Prefix string for line indentation and structure.
is_root (bool): Flag to indicate if the current directory is the root.
"""
if is_root:
output_file.write("βββ ./\n")
# Add the actual directory name as a child of ./
actual_dir_name = os.path.basename(dir_path)
if actual_dir_name:
output_file.write(f" βββ {actual_dir_name}\n")
prefix = " "
else:
prefix = " "
is_root = False
try:
items = os.listdir(dir_path)
except PermissionError:
return
items.sort()
# Filter out items that should be ignored
filtered_items = []
for item in items:
item_path = os.path.join(dir_path, item)
if not should_ignore(item_path, output_file_path):
filtered_items.append(item)
num_items = len(filtered_items)
for index, item in enumerate(filtered_items):
item_path = os.path.join(dir_path, item)
is_last_item = (index == num_items - 1)
new_prefix = "βββ " if is_last_item else "βββ "
child_prefix = " " if is_last_item else "β "
output_file.write(f"{prefix}{new_prefix}{item}\n")
if os.path.isdir(item_path):
next_prefix = prefix + child_prefix
write_tree(item_path, output_file, output_file_path, next_prefix, is_root=False)
def write_file_content(file_path, output_file):
"""
Write the contents of a given file to the output file.
Args:
file_path (str): Path of the file to read.
output_file (file object): The file object to write the contents to.
"""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
for line in file:
output_file.write(line)
except Exception as e:
output_file.write(f"Error reading file: {e}\n")
def write_file_contents_in_order(dir_path, output_file, output_file_path, repo_path):
"""
Recursively document the contents of .md and .mdx files in directory order.
Args:
dir_path (str): The path of the directory to start documenting from.
output_file (file object): The file object to write the contents to.
output_file_path (str): The path of the output file being written to.
repo_path (str): The root path of the repository for relative path calculation.
"""
try:
items = os.listdir(dir_path)
except PermissionError:
return
items = sorted(item for item in items if not should_ignore(os.path.join(dir_path, item), output_file_path))
for item in items:
item_path = os.path.join(dir_path, item)
relative_path = os.path.relpath(item_path, start=repo_path)
if os.path.isdir(item_path):
write_file_contents_in_order(item_path, output_file, output_file_path, repo_path)
elif os.path.isfile(item_path):
output_file.write(f"\n\n---\nFile: /{relative_path}\n---\n\n")
write_file_content(item_path, output_file)
def main():
"""
Main function to execute the script logic.
"""
args = parse_args()
# Check if the provided directory path is valid
if not os.path.isdir(args.repo_path):
print(f"Error: The specified directory does not exist: {args.repo_path}")
return
with open(args.output_file, 'w', encoding='utf-8') as output_file:
output_file.write("Directory Structure:\n\n")
write_tree(args.repo_path, output_file, args.output_file, "", is_root=True)
write_file_contents_in_order(args.repo_path, output_file, args.output_file, args.repo_path)
print(f"Documentation generated successfully: {args.output_file}")
if __name__ == "__main__":
main() |