Spaces:

Syzygianinfern0
/

NeuS-V

Sleeping

App Files Files Community

NeuS-V / evaluate.py

Syzygianinfern0

Add refactored codebase

8d3e73e 7 months ago

raw

history blame

3.57 kB

	import pickle
	import warnings
	from pathlib import Path

	import gradio as gr

	from neus_v.smooth_scoring import smooth_confidence_scores
	from neus_v.utils import clear_gpu_memory
	from neus_v.veval.eval import evaluate_video_with_sequence_of_images
	from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
	from neus_v.vlm.internvl import InternVL

	# Suppress specific warnings
	warnings.filterwarnings(
	"ignore", category=DeprecationWarning, message="Conversion of an array with ndim > 0 to a scalar is deprecated"
	)

	# Paths and parameters
	WEIGHT_PATH = Path("/opt/mars/mnt/model_weights")
	pickle_path = WEIGHT_PATH / "distributions.pkl"
	num_of_frame_in_sequence = 3
	model = "InternVL2-8B"
	device = 7
	# Load the vision-language model
	vision_language_model = InternVL(model_name=model, device=device)
	# Load distributions
	with open(pickle_path, "rb") as f:
	distributions = pickle.load(f)
	all_dimension_data = distributions.get(model).get("all_dimension")


	# TODO: Make paths better for public release
	def process_video(video_path, propositions, tl):
	"""Process the video and compute the score_on_all."""
	proposition_set = parse_proposition_set(propositions.split(","))
	tl_spec = parse_tl_specification(tl)
	threshold = 0.349

	try:
	result = evaluate_video_with_sequence_of_images(
	vision_language_model=vision_language_model,
	confidence_as_token_probability=True,
	video_path=video_path,
	proposition_set=proposition_set,
	tl_spec=tl_spec,
	parallel_inference=False,
	num_of_frame_in_sequence=num_of_frame_in_sequence,
	threshold=threshold,
	)
	probability = result.get("probability")
	score_on_all = float(
	smooth_confidence_scores(
	target_data=[probability],
	prior_distribution=all_dimension_data,
	)
	)
	clear_gpu_memory()
	return score_on_all

	except Exception as e:
	clear_gpu_memory()
	return f"Error: {str(e)}"


	# Gradio interface
	def demo_interface(video, propositions, tl):
	"""Wrapper for the Gradio interface."""
	return process_video(video, propositions, tl)


	def main():
	# Example data from the original script
	example_video_path_1 = "/opt/mars/mnt/dataset/teaser/A_storm_bursts_in_with_intermittent_lightning_and_causes_flooding_and_large_waves_crash_in.mp4"
	example_video_path_2 = "/opt/mars/mnt/dataset/teaser/The ocean waves gently lapping at the shore, until a storm bursts in, and then lightning flashes across the sky.mp4"
	example_propositions = "waves lapping,ocean shore,storm bursts in,lightning on the sky"
	example_tl = '("waves_lapping" & "ocean_shore") U ("storm_bursts_in" U "lightning_on_the_sky")'

	demo = gr.Interface(
	fn=demo_interface,
	inputs=[
	gr.Video(label="Upload Video"),
	gr.Textbox(label="List of Propositions (comma-separated)"),
	gr.Textbox(label="Temporal Logic Specification"),
	],
	outputs=gr.Textbox(label="Score on All"),
	title="Video Evaluation with Temporal Logic",
	description="Upload a video and provide propositions and temporal logic to evaluate the score_on_all.",
	examples=[
	[example_video_path_1, example_propositions, example_tl],
	[example_video_path_2, example_propositions, example_tl],
	],
	)

	demo.launch(allowed_paths=["/opt/mars/mnt/dataset/teaser"])


	if __name__ == "__main__":
	main()