import os import io import base64 import decord from PIL import Image EXAMPLES_LIST = [ [ ["videos/demo1.mp4"], "What's the third criteria promoted by Nick Mortimer that a continent must meet?" ], [ ["videos/demo2.mp4"], "When does the deer appear in the video? Give me the specific time range in seconds." ] ] def display_example(file_list, root_dir: str = None): media_html = "" for _, file_path in enumerate(file_list): if root_dir is not None: full_file_path = os.path.join(root_dir, file_path) else: full_file_path = file_path # # Check if it's a video file # if full_file_path.endswith((".mp4", ".mov", ".avi", ".webm")): file_ext = os.path.splitext(full_file_path)[1].lower() # 对于视频文件,提取第一帧作为预览图 if file_ext in [".mp4", ".mov", ".avi", ".webm"]: try: # 使用 decord 读取视频 vr = decord.VideoReader(full_file_path) # 提取第一帧 (索引为 0) frame = vr[0].asnumpy() # 将 NumPy 数组 (RGB) 转换为 PIL Image image = Image.fromarray(frame) buffered = io.BytesIO() image.save(buffered, format="PNG") img_b64_str = base64.b64encode(buffered.getvalue()).decode("utf-8") # 使用 img 标签显示这一帧 media_str = f'Video preview for {file_path}' except Exception as e: media_str = f"Error processing video {file_path}: {e}" else: # Assume it's an image image = Image.open(full_file_path) buffered = io.BytesIO() image.save(buffered, format="PNG", quality=100) img_b64_str = base64.b64encode(buffered.getvalue()).decode() media_str = f'{file_path}' media_html += media_str result_html = f"""
{media_html}
""" return result_html def get_examples(root_dir: str = None): examples = [] for files, texts in EXAMPLES_LIST: examples.append([files, display_example(files, root_dir), texts]) return examples