from pathlib import PosixPath
from concurrent.futures import ThreadPoolExecutor, as_completed
import decord
from tqdm import tqdm

# 检查 decord 是否安装
try:
    import decord
except ImportError:
    raise ImportError(
        "The `decord` package is required for loading the video dataset. Install with `pip install decord`"
    )

# 设置 decord 使用的桥接
decord.bridge.set_bridge("torch")

# 从文件中读取行
def read_lines_from_file(file_path):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            for line in lines:
                print(line.strip())  # 使用 strip() 去除行末的换行符
    except Exception as e:
        print(f"An error occurred: {e}")

# 将文件行读取到列表中
def read_lines_to_list(file_path):
    lines_list = []
    try:
        with open(file_path, 'r') as file:
            lines_list = [line.strip() for line in file]  # 使用列表推导式逐行读取并去除行末换行符
    except Exception as e:
        print(f"An error occurred: {e}")
    return lines_list

# 处理单个视频文件
def process_video(file, error_file_path):
    filename = PosixPath(file)
    try:
        video_reader = decord.VideoReader(uri=filename.as_posix())
    except Exception as e:
        with open(error_file_path, 'a') as f:
            f.write(f"{file}\n")
        print(f"Could not read video: {file}. Error: {e}")

# 使用示例
file_path = '/home/cn/Datasets/SakugaDataset/output_81.txt'
file_list = read_lines_to_list(file_path)
error_file_path = 'unreadable_videos_81.txt'

# 使用 ThreadPoolExecutor 实现多线程处理
with ThreadPoolExecutor(max_workers=16) as executor:  # 可以根据需要调整 max_workers 的数量
    futures = {executor.submit(process_video, file, error_file_path): file for file in file_list}
    for future in tqdm(as_completed(futures), total=len(file_list)):
        try:
            future.result()
        except Exception as e:
            print(f"An error occurred: {e}")