Spaces:

LoufAn
/

AR_Testing

Sleeping

LoufAn commited on 17 days ago

Commit

91db62e

1 Parent(s): 1be9b12

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,17 +23,16 @@ def sample_video_frames(video_path, num_frames=NUM_FRAMES):
 # 推理函数：加载模型、采样视频帧、推理
 @spaces.GPU
 def analyze_video(video_file):
-    # 从上传的视频中采样图像帧
-    frames = sample_video_frames(video_file.name)
-    # 构造单轮 prompt（可改为你需要的评估内容）
     system_prompt = (
         "You are a helpful AI assistant that analyzes AR effects in videos. "
         "Evaluate the realism and placement of virtual objects in the provided video frames."
     )
     user_prompt = "Based on the frames, describe how well the AR objects blend into the real environment."
-    # 构造输入对话历史（含图像）
     history = [
         {
             "role": "system",
@@ -45,7 +44,6 @@ def analyze_video(video_file):
         }
     ]
-    # 调用 pipeline 推理
     pipe = pipeline(
         "image-text-to-text",
         model=model_id,
@@ -53,9 +51,11 @@ def analyze_video(video_file):
         torch_dtype=torch.bfloat16,
         model_kwargs={"device_map": "auto"}
     )
     result = pipe(text=history, max_new_tokens=512)
     return result[0]["generated_text"][-1]["content"]
 # Gradio 界面
 gr.Interface(
     fn=analyze_video,

 # 推理函数：加载模型、采样视频帧、推理
 @spaces.GPU
 def analyze_video(video_file):
+    # video_file 是路径字符串
+    frames = sample_video_frames(video_file)
+    # 构造 prompt
     system_prompt = (
         "You are a helpful AI assistant that analyzes AR effects in videos. "
         "Evaluate the realism and placement of virtual objects in the provided video frames."
     )
     user_prompt = "Based on the frames, describe how well the AR objects blend into the real environment."
     history = [
         {
             "role": "system",
         }
     ]
     pipe = pipeline(
         "image-text-to-text",
         model=model_id,
         torch_dtype=torch.bfloat16,
         model_kwargs={"device_map": "auto"}
     )
     result = pipe(text=history, max_new_tokens=512)
     return result[0]["generated_text"][-1]["content"]
 # Gradio 界面
 gr.Interface(
     fn=analyze_video,