hfcontext7 / repo2txt.py
Abdullah Meda
initial commit
7dc78b3
raw
history blame
6.22 kB
"""
A fork of github.com/donoceidon/repo2txt/blob/main/src/repo2txt/repo2txt.py
This version only includes the functionality to document the structure of a repository containing .md and .mdx files.
"""
import os
import argparse
def parse_args():
"""
Parse command-line arguments for the script.
Returns:
argparse.Namespace: An object containing the parsed command-line arguments.
"""
parser = argparse.ArgumentParser(
description='Document the structure of a repository containing .md and .mdx files.',
epilog='Example usage:\n python repo2txt.py -r /path/to/repo -o output.txt',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('-r', '--repo_path', default=os.getcwd(),
help='Path to the directory to process. Defaults to the current directory.')
parser.add_argument('-o', '--output_file', default='output.txt',
help='Name for the output text file. Defaults to "output.txt".')
return parser.parse_args()
def should_ignore(item_path, output_file_path):
"""
Determine if a given item should be ignored.
Only includes .md and .mdx files, ignores hidden files and directories.
Args:
item_path (str): The path of the item (file or directory) to check.
output_file_path (str): The path of the output file being written to.
Returns:
bool: True if the item should be ignored, False otherwise.
"""
item_name = os.path.basename(item_path)
# Ignore the output file itself
if os.path.abspath(item_path) == os.path.abspath(output_file_path):
return True
# Ignore hidden files and directories
if item_name.startswith('.'):
return True
# If it's a file, only include .md and .mdx files
if os.path.isfile(item_path):
file_ext = os.path.splitext(item_name)[1].lower()
return file_ext not in ['.md', '.mdx']
# Include directories (they will be traversed)
return False
def write_tree(dir_path, output_file, output_file_path, prefix="", is_root=True):
"""
Recursively write the directory tree to the output file.
Args:
dir_path (str): The path of the directory to document.
output_file (file object): The file object to write to.
output_file_path (str): The path of the output file being written to.
prefix (str): Prefix string for line indentation and structure.
is_root (bool): Flag to indicate if the current directory is the root.
"""
if is_root:
output_file.write("└── ./\n")
# Add the actual directory name as a child of ./
actual_dir_name = os.path.basename(dir_path)
if actual_dir_name:
output_file.write(f" └── {actual_dir_name}\n")
prefix = " "
else:
prefix = " "
is_root = False
try:
items = os.listdir(dir_path)
except PermissionError:
return
items.sort()
# Filter out items that should be ignored
filtered_items = []
for item in items:
item_path = os.path.join(dir_path, item)
if not should_ignore(item_path, output_file_path):
filtered_items.append(item)
num_items = len(filtered_items)
for index, item in enumerate(filtered_items):
item_path = os.path.join(dir_path, item)
is_last_item = (index == num_items - 1)
new_prefix = "└── " if is_last_item else "β”œβ”€β”€ "
child_prefix = " " if is_last_item else "β”‚ "
output_file.write(f"{prefix}{new_prefix}{item}\n")
if os.path.isdir(item_path):
next_prefix = prefix + child_prefix
write_tree(item_path, output_file, output_file_path, next_prefix, is_root=False)
def write_file_content(file_path, output_file):
"""
Write the contents of a given file to the output file.
Args:
file_path (str): Path of the file to read.
output_file (file object): The file object to write the contents to.
"""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
for line in file:
output_file.write(line)
except Exception as e:
output_file.write(f"Error reading file: {e}\n")
def write_file_contents_in_order(dir_path, output_file, output_file_path, repo_path):
"""
Recursively document the contents of .md and .mdx files in directory order.
Args:
dir_path (str): The path of the directory to start documenting from.
output_file (file object): The file object to write the contents to.
output_file_path (str): The path of the output file being written to.
repo_path (str): The root path of the repository for relative path calculation.
"""
try:
items = os.listdir(dir_path)
except PermissionError:
return
items = sorted(item for item in items if not should_ignore(os.path.join(dir_path, item), output_file_path))
for item in items:
item_path = os.path.join(dir_path, item)
relative_path = os.path.relpath(item_path, start=repo_path)
if os.path.isdir(item_path):
write_file_contents_in_order(item_path, output_file, output_file_path, repo_path)
elif os.path.isfile(item_path):
output_file.write(f"\n\n---\nFile: /{relative_path}\n---\n\n")
write_file_content(item_path, output_file)
def main():
"""
Main function to execute the script logic.
"""
args = parse_args()
# Check if the provided directory path is valid
if not os.path.isdir(args.repo_path):
print(f"Error: The specified directory does not exist: {args.repo_path}")
return
with open(args.output_file, 'w', encoding='utf-8') as output_file:
output_file.write("Directory Structure:\n\n")
write_tree(args.repo_path, output_file, args.output_file, "", is_root=True)
write_file_contents_in_order(args.repo_path, output_file, args.output_file, args.repo_path)
print(f"Documentation generated successfully: {args.output_file}")
if __name__ == "__main__":
main()