Spaces:
Running
Running
""" | |
A fork of github.com/donoceidon/repo2txt/blob/main/src/repo2txt/repo2txt.py | |
This version only includes the functionality to document the structure of a repository containing .md and .mdx files. | |
""" | |
import os | |
import argparse | |
def parse_args(): | |
""" | |
Parse command-line arguments for the script. | |
Returns: | |
argparse.Namespace: An object containing the parsed command-line arguments. | |
""" | |
parser = argparse.ArgumentParser( | |
description='Document the structure of a repository containing .md and .mdx files.', | |
epilog='Example usage:\n python repo2txt.py -r /path/to/repo -o output.txt', | |
formatter_class=argparse.RawDescriptionHelpFormatter | |
) | |
parser.add_argument('-r', '--repo_path', default=os.getcwd(), | |
help='Path to the directory to process. Defaults to the current directory.') | |
parser.add_argument('-o', '--output_file', default='output.txt', | |
help='Name for the output text file. Defaults to "output.txt".') | |
return parser.parse_args() | |
def should_ignore(item_path, output_file_path): | |
""" | |
Determine if a given item should be ignored. | |
Only includes .md and .mdx files, ignores hidden files and directories. | |
Args: | |
item_path (str): The path of the item (file or directory) to check. | |
output_file_path (str): The path of the output file being written to. | |
Returns: | |
bool: True if the item should be ignored, False otherwise. | |
""" | |
item_name = os.path.basename(item_path) | |
# Ignore the output file itself | |
if os.path.abspath(item_path) == os.path.abspath(output_file_path): | |
return True | |
# Ignore hidden files and directories | |
if item_name.startswith('.'): | |
return True | |
# If it's a file, only include .md and .mdx files | |
if os.path.isfile(item_path): | |
file_ext = os.path.splitext(item_name)[1].lower() | |
return file_ext not in ['.md', '.mdx'] | |
# Include directories (they will be traversed) | |
return False | |
def write_tree(dir_path, output_file, output_file_path, prefix="", is_root=True): | |
""" | |
Recursively write the directory tree to the output file. | |
Args: | |
dir_path (str): The path of the directory to document. | |
output_file (file object): The file object to write to. | |
output_file_path (str): The path of the output file being written to. | |
prefix (str): Prefix string for line indentation and structure. | |
is_root (bool): Flag to indicate if the current directory is the root. | |
""" | |
if is_root: | |
output_file.write("βββ ./\n") | |
# Add the actual directory name as a child of ./ | |
actual_dir_name = os.path.basename(dir_path) | |
if actual_dir_name: | |
output_file.write(f" βββ {actual_dir_name}\n") | |
prefix = " " | |
else: | |
prefix = " " | |
is_root = False | |
try: | |
items = os.listdir(dir_path) | |
except PermissionError: | |
return | |
items.sort() | |
# Filter out items that should be ignored | |
filtered_items = [] | |
for item in items: | |
item_path = os.path.join(dir_path, item) | |
if not should_ignore(item_path, output_file_path): | |
filtered_items.append(item) | |
num_items = len(filtered_items) | |
for index, item in enumerate(filtered_items): | |
item_path = os.path.join(dir_path, item) | |
is_last_item = (index == num_items - 1) | |
new_prefix = "βββ " if is_last_item else "βββ " | |
child_prefix = " " if is_last_item else "β " | |
output_file.write(f"{prefix}{new_prefix}{item}\n") | |
if os.path.isdir(item_path): | |
next_prefix = prefix + child_prefix | |
write_tree(item_path, output_file, output_file_path, next_prefix, is_root=False) | |
def write_file_content(file_path, output_file): | |
""" | |
Write the contents of a given file to the output file. | |
Args: | |
file_path (str): Path of the file to read. | |
output_file (file object): The file object to write the contents to. | |
""" | |
try: | |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: | |
for line in file: | |
output_file.write(line) | |
except Exception as e: | |
output_file.write(f"Error reading file: {e}\n") | |
def write_file_contents_in_order(dir_path, output_file, output_file_path, repo_path): | |
""" | |
Recursively document the contents of .md and .mdx files in directory order. | |
Args: | |
dir_path (str): The path of the directory to start documenting from. | |
output_file (file object): The file object to write the contents to. | |
output_file_path (str): The path of the output file being written to. | |
repo_path (str): The root path of the repository for relative path calculation. | |
""" | |
try: | |
items = os.listdir(dir_path) | |
except PermissionError: | |
return | |
items = sorted(item for item in items if not should_ignore(os.path.join(dir_path, item), output_file_path)) | |
for item in items: | |
item_path = os.path.join(dir_path, item) | |
relative_path = os.path.relpath(item_path, start=repo_path) | |
if os.path.isdir(item_path): | |
write_file_contents_in_order(item_path, output_file, output_file_path, repo_path) | |
elif os.path.isfile(item_path): | |
output_file.write(f"\n\n---\nFile: /{relative_path}\n---\n\n") | |
write_file_content(item_path, output_file) | |
def main(): | |
""" | |
Main function to execute the script logic. | |
""" | |
args = parse_args() | |
# Check if the provided directory path is valid | |
if not os.path.isdir(args.repo_path): | |
print(f"Error: The specified directory does not exist: {args.repo_path}") | |
return | |
with open(args.output_file, 'w', encoding='utf-8') as output_file: | |
output_file.write("Directory Structure:\n\n") | |
write_tree(args.repo_path, output_file, args.output_file, "", is_root=True) | |
write_file_contents_in_order(args.repo_path, output_file, args.output_file, args.repo_path) | |
print(f"Documentation generated successfully: {args.output_file}") | |
if __name__ == "__main__": | |
main() |