import os
import io
import base64
import decord
from PIL import Image
EXAMPLES_LIST = [
[
["videos/demo1.mp4"],
"What's the third criteria promoted by Nick Mortimer that a continent must meet?"
],
[
["videos/demo2.mp4"],
"When does the deer appear in the video? Give me the specific time range in seconds."
]
]
def display_example(file_list, root_dir: str = None):
media_html = ""
for _, file_path in enumerate(file_list):
if root_dir is not None:
full_file_path = os.path.join(root_dir, file_path)
else:
full_file_path = file_path
# # Check if it's a video file
# if full_file_path.endswith((".mp4", ".mov", ".avi", ".webm")):
file_ext = os.path.splitext(full_file_path)[1].lower()
# 对于视频文件,提取第一帧作为预览图
if file_ext in [".mp4", ".mov", ".avi", ".webm"]:
try:
# 使用 decord 读取视频
vr = decord.VideoReader(full_file_path)
# 提取第一帧 (索引为 0)
frame = vr[0].asnumpy()
# 将 NumPy 数组 (RGB) 转换为 PIL Image
image = Image.fromarray(frame)
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_b64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
# 使用 img 标签显示这一帧
media_str = f''
except Exception as e:
media_str = f"Error processing video {file_path}: {e}"
else:
# Assume it's an image
image = Image.open(full_file_path)
buffered = io.BytesIO()
image.save(buffered, format="PNG", quality=100)
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
media_str = f'
'
media_html += media_str
result_html = f"""