File size: 6,216 Bytes
7dc78b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
A fork of github.com/donoceidon/repo2txt/blob/main/src/repo2txt/repo2txt.py

This version only includes the functionality to document the structure of a repository containing .md and .mdx files.
"""

import os
import argparse

def parse_args():
    """
    Parse command-line arguments for the script.

    Returns:
        argparse.Namespace: An object containing the parsed command-line arguments.
    """
    parser = argparse.ArgumentParser(
        description='Document the structure of a repository containing .md and .mdx files.',
        epilog='Example usage:\n  python repo2txt.py -r /path/to/repo -o output.txt',
        formatter_class=argparse.RawDescriptionHelpFormatter
    )

    parser.add_argument('-r', '--repo_path', default=os.getcwd(),
                        help='Path to the directory to process. Defaults to the current directory.')
    parser.add_argument('-o', '--output_file', default='output.txt',
                        help='Name for the output text file. Defaults to "output.txt".')

    return parser.parse_args()


def should_ignore(item_path, output_file_path):
    """
    Determine if a given item should be ignored.
    Only includes .md and .mdx files, ignores hidden files and directories.

    Args:
        item_path (str): The path of the item (file or directory) to check.
        output_file_path (str): The path of the output file being written to.

    Returns:
        bool: True if the item should be ignored, False otherwise.
    """
    item_name = os.path.basename(item_path)
    
    # Ignore the output file itself
    if os.path.abspath(item_path) == os.path.abspath(output_file_path):
        return True

    # Ignore hidden files and directories
    if item_name.startswith('.'):
        return True

    # If it's a file, only include .md and .mdx files
    if os.path.isfile(item_path):
        file_ext = os.path.splitext(item_name)[1].lower()
        return file_ext not in ['.md', '.mdx']

    # Include directories (they will be traversed)
    return False


def write_tree(dir_path, output_file, output_file_path, prefix="", is_root=True):
    """
    Recursively write the directory tree to the output file.

    Args:
        dir_path (str): The path of the directory to document.
        output_file (file object): The file object to write to.
        output_file_path (str): The path of the output file being written to.
        prefix (str): Prefix string for line indentation and structure.
        is_root (bool): Flag to indicate if the current directory is the root.
    """
    if is_root:
        output_file.write("└── ./\n")
        # Add the actual directory name as a child of ./
        actual_dir_name = os.path.basename(dir_path)
        if actual_dir_name:
            output_file.write(f"    └── {actual_dir_name}\n")
            prefix = "        "
        else:
            prefix = "    "
        is_root = False

    try:
        items = os.listdir(dir_path)
    except PermissionError:
        return
    
    items.sort()
    
    # Filter out items that should be ignored
    filtered_items = []
    for item in items:
        item_path = os.path.join(dir_path, item)
        if not should_ignore(item_path, output_file_path):
            filtered_items.append(item)
    
    num_items = len(filtered_items)

    for index, item in enumerate(filtered_items):
        item_path = os.path.join(dir_path, item)
        is_last_item = (index == num_items - 1)
        new_prefix = "└── " if is_last_item else "β”œβ”€β”€ "
        child_prefix = "    " if is_last_item else "β”‚   "

        output_file.write(f"{prefix}{new_prefix}{item}\n")

        if os.path.isdir(item_path):
            next_prefix = prefix + child_prefix
            write_tree(item_path, output_file, output_file_path, next_prefix, is_root=False)


def write_file_content(file_path, output_file):
    """
    Write the contents of a given file to the output file.

    Args:
        file_path (str): Path of the file to read.
        output_file (file object): The file object to write the contents to.
    """
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
            for line in file:
                output_file.write(line)
    except Exception as e:
        output_file.write(f"Error reading file: {e}\n")


def write_file_contents_in_order(dir_path, output_file, output_file_path, repo_path):
    """
    Recursively document the contents of .md and .mdx files in directory order.

    Args:
        dir_path (str): The path of the directory to start documenting from.
        output_file (file object): The file object to write the contents to.
        output_file_path (str): The path of the output file being written to.
        repo_path (str): The root path of the repository for relative path calculation.
    """
    try:
        items = os.listdir(dir_path)
    except PermissionError:
        return
    
    items = sorted(item for item in items if not should_ignore(os.path.join(dir_path, item), output_file_path))

    for item in items:
        item_path = os.path.join(dir_path, item)
        relative_path = os.path.relpath(item_path, start=repo_path)

        if os.path.isdir(item_path):
            write_file_contents_in_order(item_path, output_file, output_file_path, repo_path)
        elif os.path.isfile(item_path):
            output_file.write(f"\n\n---\nFile: /{relative_path}\n---\n\n")
            write_file_content(item_path, output_file)


def main():
    """
    Main function to execute the script logic.
    """
    args = parse_args()

    # Check if the provided directory path is valid
    if not os.path.isdir(args.repo_path):
        print(f"Error: The specified directory does not exist: {args.repo_path}")
        return

    with open(args.output_file, 'w', encoding='utf-8') as output_file:
        output_file.write("Directory Structure:\n\n")
        write_tree(args.repo_path, output_file, args.output_file, "", is_root=True)
        write_file_contents_in_order(args.repo_path, output_file, args.output_file, args.repo_path)

    print(f"Documentation generated successfully: {args.output_file}")


if __name__ == "__main__":
    main()