Spaces:
Runtime error
Runtime error
| import os | |
| import tempfile | |
| import zipfile | |
| def extract_files_and_filepath_from_dir(directory, folder_paths, file_paths): | |
| all_texts = [] | |
| file_references = [] | |
| zip_filename = next((file for file in os.listdir(directory) if file.endswith('.zip')), None) | |
| zip_file_path = os.path.join(directory, zip_filename) | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Unzip the file into the temporary directory | |
| with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
| zip_ref.extractall(tmpdirname) | |
| files = [] | |
| print("tmpdirname: " , tmpdirname) | |
| unzipped_root = os.listdir(tmpdirname) | |
| print("unzipped_root ", unzipped_root) | |
| tmpsubdirpath= os.path.join(tmpdirname, unzipped_root[0]) | |
| print("tempsubdirpath: ", tmpsubdirpath) | |
| if folder_paths: | |
| for folder_path in folder_paths: | |
| files += _get_all_files_in_folder(tmpsubdirpath, folder_path) | |
| if file_paths: | |
| files += [_get_file(tmpsubdirpath, file_path) for file_path in file_paths] | |
| print(f"Total number of files: {len(files)}") | |
| for file_path in files: | |
| # print("111111111:", file_path) | |
| file_ext = os.path.splitext(file_path)[1] | |
| # print("222222222:", file_ext) | |
| if os.path.getsize(file_path) == 0: | |
| print(f"Skipping an empty file: {file_path}") | |
| continue | |
| with open(file_path, 'rb') as f: | |
| if file_ext in ['.rst', '.py']: | |
| text = f.read().decode('utf-8') | |
| all_texts.append(text) | |
| print("Filepaths brother:", file_path) | |
| relative_path = os.path.relpath(file_path, tmpsubdirpath) | |
| print("Relative Filepaths brother:", relative_path) | |
| file_references.append(relative_path) | |
| return all_texts, file_references | |
| def _get_all_files_in_folder(temp_dir, folder_path): | |
| all_files = [] | |
| target_dir = os.path.join(temp_dir, folder_path) | |
| for root, dirs, files in os.walk(target_dir): | |
| print(f"Files in current directory ({root}): {files}") | |
| for file in files: | |
| print(f"Processing file: {file}") | |
| all_files.append(os.path.join(root, file)) | |
| return all_files | |
| def _get_file(temp_dir, file_path): | |
| full_path = os.path.join(temp_dir, file_path) | |
| return full_path | |