Spaces:

Syzygianinfern0
/

NeuS-V

Running on L4

App Files Files Community

Syzygianinfern0 commited on Jan 14

Commit

8d3e73e

0 Parent(s):

Add refactored codebase

Browse files

Files changed (24) hide show

.gitignore +168 -0
README.md +0 -0
evaluate.py +98 -0
install.sh +34 -0
neus_v/automaton/__init__.py +0 -0
neus_v/automaton/video_automaton.py +133 -0
neus_v/model_checking/__init__.py +0 -0
neus_v/model_checking/proposition.py +6 -0
neus_v/model_checking/stormpy.py +234 -0
neus_v/model_checking/video_state.py +80 -0
neus_v/smooth_scoring.py +20 -0
neus_v/utils.py +10 -0
neus_v/veval/__init__.py +0 -0
neus_v/veval/eval.py +250 -0
neus_v/veval/parse.py +29 -0
neus_v/video/__init__.py +0 -0
neus_v/video/frame.py +81 -0
neus_v/video/read_video.py +45 -0
neus_v/video/video.py +304 -0
neus_v/vlm/__init__.py +0 -0
neus_v/vlm/internvl.py +373 -0
neus_v/vlm/internvl_utils.py +241 -0
neus_v/vlm/obj.py +45 -0
setup.py +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,168 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md ADDED Viewed

File without changes

evaluate.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import pickle
+import warnings
+from pathlib import Path
+import gradio as gr
+from neus_v.smooth_scoring import smooth_confidence_scores
+from neus_v.utils import clear_gpu_memory
+from neus_v.veval.eval import evaluate_video_with_sequence_of_images
+from neus_v.veval.parse import parse_proposition_set, parse_tl_specification
+from neus_v.vlm.internvl import InternVL
+# Suppress specific warnings
+warnings.filterwarnings(
+    "ignore", category=DeprecationWarning, message="Conversion of an array with ndim > 0 to a scalar is deprecated"
+)
+# Paths and parameters
+WEIGHT_PATH = Path("/opt/mars/mnt/model_weights")
+pickle_path = WEIGHT_PATH / "distributions.pkl"
+num_of_frame_in_sequence = 3
+model = "InternVL2-8B"
+device = 7
+# Load the vision-language model
+vision_language_model = InternVL(model_name=model, device=device)
+# Load distributions
+with open(pickle_path, "rb") as f:
+    distributions = pickle.load(f)
+all_dimension_data = distributions.get(model).get("all_dimension")
+# TODO: Make paths better for public release
+def process_video(video_path, propositions, tl):
+    """Process the video and compute the score_on_all."""
+    proposition_set = parse_proposition_set(propositions.split(","))
+    tl_spec = parse_tl_specification(tl)
+    threshold = 0.349
+    try:
+        result = evaluate_video_with_sequence_of_images(
+            vision_language_model=vision_language_model,
+            confidence_as_token_probability=True,
+            video_path=video_path,
+            proposition_set=proposition_set,
+            tl_spec=tl_spec,
+            parallel_inference=False,
+            num_of_frame_in_sequence=num_of_frame_in_sequence,
+            threshold=threshold,
+        )
+        probability = result.get("probability")
+        score_on_all = float(
+            smooth_confidence_scores(
+                target_data=[probability],
+                prior_distribution=all_dimension_data,
+            )
+        )
+        clear_gpu_memory()
+        return score_on_all
+    except Exception as e:
+        clear_gpu_memory()
+        return f"Error: {str(e)}"
+# Gradio interface
+def demo_interface(video, propositions, tl):
+    """Wrapper for the Gradio interface."""
+    return process_video(video, propositions, tl)
+def main():
+    # Example data from the original script
+    example_video_path_1 = "/opt/mars/mnt/dataset/teaser/A_storm_bursts_in_with_intermittent_lightning_and_causes_flooding_and_large_waves_crash_in.mp4"
+    example_video_path_2 = "/opt/mars/mnt/dataset/teaser/The ocean waves gently lapping at the shore, until a storm bursts in, and then lightning flashes across the sky.mp4"
+    example_propositions = "waves lapping,ocean shore,storm bursts in,lightning on the sky"
+    example_tl = '("waves_lapping" & "ocean_shore") U ("storm_bursts_in" U "lightning_on_the_sky")'
+    demo = gr.Interface(
+        fn=demo_interface,
+        inputs=[
+            gr.Video(label="Upload Video"),
+            gr.Textbox(label="List of Propositions (comma-separated)"),
+            gr.Textbox(label="Temporal Logic Specification"),
+        ],
+        outputs=gr.Textbox(label="Score on All"),
+        title="Video Evaluation with Temporal Logic",
+        description="Upload a video and provide propositions and temporal logic to evaluate the score_on_all.",
+        examples=[
+            [example_video_path_1, example_propositions, example_tl],
+            [example_video_path_2, example_propositions, example_tl],
+        ],
+    )
+    demo.launch(allowed_paths=["/opt/mars/mnt/dataset/teaser"])
+if __name__ == "__main__":
+    main()

install.sh ADDED Viewed

	@@ -0,0 +1,34 @@

+#/bin/bash
+# These are the commands that I used to install the necessary packages for the project
+conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch -c nvidia
+pip install gradio
+pip install transformers
+pip install decord
+pip install opencv-python
+pip install joblib
+# Storm
+sudo apt install libboost-all-dev m4
+mkdir build
+cd build
+wget https://github.com/moves-rwth/storm/archive/stable.zip
+unzip stable.zip
+cd storm-stable
+mkdir build
+cd build
+cmake ..
+# Carl
+cd FILLEMEUP
+git clone https://github.com/moves-rwth/carl-storm
+cd carl-storm
+mkdir build
+cd build
+cmake ..
+make lib_carl
+pip install pycarl
+pip install stormpy

neus_v/automaton/__init__.py ADDED Viewed

File without changes

neus_v/automaton/video_automaton.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from neus_v.model_checking.proposition import process_proposition_set
+from neus_v.model_checking.video_state import VideoState
+from neus_v.video.frame import VideoFrame
+class VideoAutomaton:
+    """Represents a Markov Automaton for video state modeling."""
+    def __init__(self, include_initial_state: bool = False) -> None:
+        """Initialize the MarkovAutomaton.
+        Args:
+            include_initial_state (bool, optional): Whether to include
+                the initial state. Defaults to False.
+            proposition_set (list[str] | None, optional): List of propositions.
+                Defaults to None.
+        """
+        self.previous_states: list[VideoState] = []
+        self.states: list[VideoState] = []
+        self.transitions = []
+        self.include_initial_state = include_initial_state
+    def set_up(self, proposition_set: list[str]) -> None:
+        """Set up the MarkovAutomaton."""
+        self.proposition_set = process_proposition_set(proposition_set)
+        self.label_combinations = self._create_label_combinations(len(proposition_set))
+        self.probability_of_propositions = [[] for _ in range(len(proposition_set))]
+        self.frame_index_in_automaton = 0
+        if self.include_initial_state:
+            initial_state = VideoState(
+                state_index=0,
+                frame_index=-1,
+                label="init",
+                proposition_set=proposition_set,
+            )
+            self.previous_states = [initial_state]
+            self.states = [initial_state]
+            self._current_state = initial_state
+    def reset(self) -> None:
+        """Reset automaton."""
+        self.__init__(self.include_initial_state)
+        self.set_up(self.proposition_set)
+    def add_frame(self, frame: VideoFrame) -> None:
+        """Add frame to automaton."""
+        self._get_probability_of_propositions(frame)
+        current_states = []
+        for prop_comb in self.label_combinations:
+            # iterate through all possible combinations of T and F
+            self._current_state = VideoState(
+                state_index=len(self.states),
+                frame_index=self.frame_index_in_automaton,
+                label=prop_comb,
+                proposition_set=self.proposition_set,
+            )
+            # TODO: Make a method for update and compute probability
+            self._current_state.update(
+                frame_index=self.frame_index_in_automaton,
+                target_label=prop_comb,
+            )
+            self._current_state.compute_probability(probabilities=self.probability_of_propositions)
+            if self._current_state.probability > 0:
+                self.states.append(self._current_state)
+                current_states.append(self._current_state)
+        # Build transitions from previous states to current states
+        if self.previous_states:
+            for prev_state in self.previous_states:
+                for cur_state in current_states:
+                    transition = (
+                        prev_state.state_index,
+                        cur_state.state_index,
+                        cur_state.probability,
+                    )
+                    self.transitions.append(transition)
+        self.previous_states = current_states if current_states else self.previous_states
+        self.frame_index_in_automaton += 1
+    def add_terminal_state(self, add_with_terminal_label: bool = False) -> None:
+        """Add terminal state to the automaton."""
+        if add_with_terminal_label:
+            terminal_state_index = len(self.states)
+            terminal_state = VideoState(
+                state_index=terminal_state_index,
+                frame_index=self.frame_index_in_automaton,
+                label="terminal",
+                proposition_set=self.proposition_set,
+            )
+            self.states.append(terminal_state)
+            self._current_state = terminal_state
+            self.transitions.extend(
+                (prev_state.state_index, terminal_state_index, 1.0) for prev_state in self.previous_states
+            )
+            self.transitions.append((terminal_state_index, terminal_state_index, 1.0))
+        else:
+            self.transitions.extend(
+                (prev_state.state_index, prev_state.state_index, 1.0) for prev_state in self.previous_states
+            )
+    def _get_probability_of_propositions(self, frame: VideoFrame) -> None:
+        """Update the probability of propositions."""
+        for i, prop in enumerate(self.proposition_set):
+            prop = prop.replace("_", " ")
+            if frame.object_of_interest.get(prop):
+                probability = frame.object_of_interest[prop].get_probability()
+            else:
+                probability = 0.0
+            self.probability_of_propositions[i].append(round(probability, 2))
+    def _create_label_combinations(self, num_props: int) -> list[str]:
+        """Create all possible combinations of T and F for the number of propositions.
+        Args:
+            num_props (int): Number of propositions.
+        Returns:
+            list[str]: List of all possible combinations of T and F.
+        """  # noqa: E501
+        label_list = []
+        def add_labels(num_props: int, label: str, label_list: list[str]) -> None:
+            if len(label) == num_props:
+                label_list.append(label)
+                return
+            add_labels(num_props, label + "T", label_list)
+            add_labels(num_props, label + "F", label_list)
+        add_labels(num_props, "", label_list)
+        return label_list

neus_v/model_checking/__init__.py ADDED Viewed

File without changes

neus_v/model_checking/proposition.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def process_proposition_set(proposition_set: list[str]) -> list:
+    """Process proposition set."""
+    new_set = []
+    for proposition in proposition_set:
+        new_set.append(proposition.replace(" ", "_"))
+    return new_set

neus_v/model_checking/stormpy.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import logging
+import math
+import numpy as np
+import stormpy
+import stormpy.examples.files
+from stormpy.core import ExplicitQualitativeCheckResult
+from neus_v.model_checking.proposition import process_proposition_set
+from neus_v.model_checking.video_state import VideoState
+class StormModelChecker:
+    """Model Checker using Stormpy for verifying properties."""
+    def __init__(
+        self,
+        proposition_set: list[str],
+        ltl_formula: str,
+    ) -> None:
+        """Initialize the StormModelChecker.
+        Args:
+            proposition_set: List of propositions.
+            ltl_formula: LTL formula to check.
+            verbose: Enable verbose output.
+            is_filter: Apply filtering to results.
+        """
+        self.proposition_set = process_proposition_set(proposition_set)
+        self.ltl_formula = ltl_formula
+    def create_model(
+        self,
+        transitions: list[tuple[int, int, float]],
+        states: list[VideoState],
+        model_type: str = "sparse_ma",
+    ) -> any:
+        """Create model.
+        Args:
+            transitions (list[tuple[int, int, float]]): List of transitions.
+            states (list[VideoState]): List of states.
+            model_type (str): Type of model to create ("sparse_ma" or "dtmc").
+            verbose (bool): Whether to print verbose output.
+        """
+        state_labeling = self._build_label_func(states, self.proposition_set)
+        if model_type in ["sparse_ma", "mdp"]:
+            transition_matrix = self._build_trans_matrix(
+                transitions=transitions,
+                states=states,
+                model_type="nondeterministic",
+            )
+        else:
+            transition_matrix = self._build_trans_matrix(
+                transitions=transitions,
+                states=states,
+                model_type="deterministic",
+            )
+        components = stormpy.SparseModelComponents(
+            transition_matrix=transition_matrix,
+            state_labeling=state_labeling,
+        )
+        if model_type == "sparse_ma":
+            markovian_states = stormpy.BitVector(len(states), list(range(len(states))))
+            components.markovian_states = markovian_states
+            components.exit_rates = [1.0 for _ in range(len(states))]
+            model = stormpy.SparseMA(components)
+        elif model_type == "dtmc":
+            model = stormpy.storage.SparseDtmc(components)
+        elif model_type == "mdp":
+            model = stormpy.storage.SparseMdp(components)
+        else:
+            msg = f"Unsupported model type: {model_type}"
+            raise ValueError(msg)
+        return model
+    def check_automaton(
+        self,
+        transitions: list[tuple[int, int, float]],
+        states: list[VideoState],
+        model_type: str = "sparse_ma",
+        use_filter: bool = False,
+    ) -> any:
+        """Check automaton.
+        Args:
+            transitions: List of transitions.
+            states: List of states.
+            verbose: Enable verbose output.
+            use_filter: Apply filtering to results.
+        """
+        model = self.create_model(
+            transitions=transitions,
+            states=states,
+            model_type=model_type,
+        )
+        # Check the model
+        # Initialize Prism Program
+        path = stormpy.examples.files.prism_dtmc_die  #  prism_mdp_maze
+        prism_program = stormpy.parse_prism_program(path)
+        # Define Properties
+        properties = stormpy.parse_properties(self.ltl_formula, prism_program)
+        # Get Result and Filter it
+        result = stormpy.model_checking(model, properties[0])
+        if use_filter:
+            # The final result will only consider paths starting from the initial states of the automaton.  # noqa: E501
+            filtered_result = stormpy.create_filter_initial_states_sparse(model)
+            result.filter(filtered_result)
+        return result
+    def qualitative_result_eval(self, verification_result: ExplicitQualitativeCheckResult) -> bool:
+        if isinstance(verification_result, ExplicitQualitativeCheckResult):
+            # string result is "true" when is absolutely true
+            # but it returns "true, false" when we have some true and false
+            verification_result_str = str(verification_result)
+            string_result = verification_result_str.split("{")[-1].split("}")[0]
+            if len(string_result) == 4:
+                if string_result[0] == "t":  # 0,6
+                    result = True
+            elif len(string_result) > 5:
+                # "true, false" -> some true and some false
+                result = True
+            else:
+                result = False
+            return result
+        msg = "Model Checking is not qualitative"
+        raise ValueError(msg)
+    def _build_trans_matrix(
+        self,
+        transitions: list[tuple[int, int, float]],
+        states: list[VideoState],
+        model_type: str = "nondeterministic",
+    ) -> stormpy.storage.SparseMatrix:
+        """Build transition matrix.
+        Args:
+            transitions: List of transitions.
+            states: List of states.
+            model_type: Type of model ("nondeterministic" or "deterministic").
+        """
+        if model_type not in ["nondeterministic", "deterministic"]:
+            msg = "Invalid model_type. Must be 'nondeterministic' or 'deterministic'"  # noqa: E501
+            raise ValueError(msg)
+        if model_type == "nondeterministic":
+            matrix = np.zeros((len(states), len(states)))
+            for t in transitions:
+                matrix[int(t[0]), int(t[1])] = float(t[2])
+            trans_matrix = stormpy.build_sparse_matrix(matrix, list(range(len(states))))
+        elif model_type == "deterministic":
+            num_states = len(states)
+            builder = stormpy.SparseMatrixBuilder(
+                rows=num_states,
+                columns=num_states,
+                entries=len(transitions),
+                force_dimensions=False,
+            )
+            states_with_transitions = set(src for src, _, _ in transitions)
+            outgoing_probs = {i: 0.0 for i in range(num_states)}
+            for src, dest, prob in transitions:
+                builder.add_next_value(src, dest, prob)
+                outgoing_probs[src] += prob
+            for state in range(num_states):
+                if state not in states_with_transitions:
+                    builder.add_next_value(state, state, 1.0)
+                    outgoing_probs[state] = 1.0
+            # Check probabilities
+            for state, prob_sum in outgoing_probs.items():
+                if not math.isclose(prob_sum, 1.0, rel_tol=1e-9):
+                    logging.warning(f"State {state} has outgoing probability sum of {prob_sum}, not 1.0")
+            # ... (existing logging code) ...
+            trans_matrix = builder.build()
+        return trans_matrix
+    def _build_label_func(
+        self,
+        states: list[VideoState],
+        props: list[str],
+        model_type: str = "nondeterministic",
+    ) -> stormpy.storage.StateLabeling:
+        """Build label function.
+        Args:
+            states (list[State]): List of states.
+            props (list[str]): List of propositions.
+            model_type (str): Type of model
+                ("nondeterministic" or "deterministic").
+        Returns:
+            stormpy.storage.StateLabeling: State labeling.
+        """
+        state_labeling = stormpy.storage.StateLabeling(len(states))
+        state_labeling.add_label("init")
+        state_labeling.add_label("terminal")
+        for label in props:
+            state_labeling.add_label(label)
+        if model_type == "nondeterministic":
+            for state in states:
+                for label in state.descriptive_label:
+                    state_labeling.add_label_to_state(label, state.state_index)
+        else:
+            for i, state in enumerate(states):
+                for prop in state.props:
+                    if prop in props:
+                        state_labeling.add_label_to_state(prop, i)
+        return state_labeling
+    def validate_tl_specification(self, ltl_formula: str) -> bool:
+        """Validate LTL specification.
+        Args:
+            ltl_formula: LTL formula to validate.
+        """
+        path = stormpy.examples.files.prism_dtmc_die  #  prism_mdp_maze
+        prism_program = stormpy.parse_prism_program(path)
+        # Define Properties
+        try:
+            stormpy.parse_properties(ltl_formula, prism_program)
+        except Exception as e:
+            msg = f"Error validating LTL specification: {e}"
+            logging.exception(msg)
+            return False
+        else:
+            return True

neus_v/model_checking/video_state.py ADDED Viewed

	@@ -0,0 +1,80 @@

+class VideoState:
+    """Video state class."""
+    def __init__(
+        self,
+        state_index: int,
+        frame_index: int,
+        label: str,
+        proposition_set: list[str],
+        probability: float = 1.0,
+    ) -> None:
+        """State class.
+        Args:
+            state_index (int): state_index.
+            frame_index (int): Frame index.
+            label (str): Label set. :abel is a string with characters T or F
+                indicating True or False
+            proposition_set (list[str]): Proposition set.
+            probability (float): Probability of the state.
+        """
+        self.state_index = state_index
+        self.frame_index = frame_index
+        self.proposition_set = proposition_set
+        self.label = label  # "init", "terminal", TTT, TFT, FTT, etc.
+        self.descriptive_label = self._get_descriptive_label(label=label)
+        self.probability = probability
+    def __repr__(self) -> str:
+        """Representation of state."""
+        return f"{self.state_index} {self.descriptive_label} {self.frame_index} {self.probability}"  # noqa: E501
+    def __str__(self) -> str:
+        """String of state."""
+        return f"{self.__repr__()}"
+    def _get_descriptive_label(self, label: str) -> list:
+        """Get descriptive label.
+        Args:
+        label (str): Label.
+        """
+        labels = []
+        if label == "init":
+            labels.append("init")
+        elif label == "terminal":
+            labels.append("terminal")
+        else:
+            for i in range(len(self.proposition_set)):
+                if label[i] == "T":
+                    labels.append(self.proposition_set[i])
+        return labels
+    def update(self, frame_index: int, target_label: str) -> None:
+        """Update state to the new state..
+        Args:
+            frame_index (int): Frame index.
+            target_label (str): Target label for the new state.
+        """
+        self.frame_index = frame_index
+        self.label = target_label  # TTT, TFT, FTT, etc.
+        self.descriptive_label = self._get_descriptive_label(label=target_label)
+        self.probability = 1.0
+    def compute_probability(self, probabilities: list[list[float]]) -> None:
+        """Compute probability of the state given the probabilities of the propositions.
+        Args:
+            probabilities (list): list of probabilities of the propositions
+                e.g. two propositions with three frames
+                -> [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]].
+        """  # noqa: E501
+        probability = 1.0
+        for i in range(len(self.label)):
+            if self.label[i] == "T":
+                probability *= probabilities[i][self.frame_index]
+            else:
+                probability *= 1 - probabilities[i][self.frame_index]
+        self.probability = round(probability, 3)

neus_v/smooth_scoring.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import numpy as np
+class DataTransformer:
+    def __init__(self, data):
+        self.data = np.asarray(data)
+        self.sorted_data = np.sort(self.data)
+        self.n = len(self.sorted_data)
+        self.ecdf = np.arange(1, self.n + 1) / self.n
+    def mapping_function(self, x):
+        x = np.asarray(x)
+        return np.interp(x, self.sorted_data, self.ecdf, left=0, right=1)
+def smooth_confidence_scores(target_data, prior_distribution=None):
+    if prior_distribution is None:
+        prior_distribution = target_data
+    transformer = DataTransformer(prior_distribution)
+    return transformer.mapping_function(target_data)

neus_v/utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import gc
+import torch
+def clear_gpu_memory():
+    torch.cuda.empty_cache()
+    if torch.cuda.is_available():
+        torch.cuda.ipc_collect()
+    gc.collect()

neus_v/veval/__init__.py ADDED Viewed

File without changes

neus_v/veval/eval.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import sys
+from pathlib import Path
+import numpy as np
+from joblib import Parallel, delayed
+from neus_v.automaton.video_automaton import VideoAutomaton
+from neus_v.model_checking.stormpy import StormModelChecker
+from neus_v.veval.parse import parse_tl_formula
+from neus_v.video.frame import VideoFrame
+from neus_v.video.read_video import read_video
+def create_frame_windows(frames: list, window_size: int) -> list[list]:
+    """Create non-overlapping windows of frames, with remainder in last window.
+    Args:
+        frames: List of frames
+        window_size: Size of each window
+    Returns:
+        List of frame windows
+    """
+    windows = []
+    for i in range(0, len(frames), window_size):
+        windows.append(frames[i : i + window_size])
+    return windows
+def evaluate_video(
+    vision_language_model,
+    confidence_as_token_probability: bool,
+    video_path: Path | str,
+    proposition_set: list,
+    tl_spec: str,
+    parallel_inference: bool = False,
+    threshold: float = 0.1,
+) -> dict:
+    """Evaluate a video using the given vision language model."""
+    output_log = {
+        "specification": None,
+        "propositions": None,
+        "probability": None,
+        "min_probability": None,
+        "max_probability": None,
+        "propositions_avg_probability": {},
+    }
+    if isinstance(video_path, str):
+        video_path = Path(video_path)
+    video = read_video(video_path=video_path)
+    # TODO: if there's F in the tl_spec
+    ltl_formula = parse_tl_formula(tl_spec)
+    video_automaton = VideoAutomaton(include_initial_state=True)
+    video_automaton.set_up(proposition_set=proposition_set)
+    model_checker = StormModelChecker(
+        proposition_set=proposition_set,
+        ltl_formula=ltl_formula,
+    )
+    proposition_probability_record = {}
+    for proposition in proposition_set:
+        proposition_probability_record[proposition] = []
+    if model_checker.validate_tl_specification(ltl_formula):
+        frame_count = 0
+        all_frames: list[np.ndarray] = video.get_all_frames_of_video(
+            return_format="ndarray",
+            desired_interval_in_sec=1,
+        )
+        try:
+            # for frame_img in all_frames:
+            def process_frame(frame_img: np.ndarray, frame_count: int):
+                sys.stdout.write(f"\rProcessing frame: {frame_count+1}/{len(all_frames)} ")
+                sys.stdout.flush()
+                object_of_interest = {}
+                for proposition in proposition_set:
+                    detected_object = vision_language_model.detect(
+                        frame_img=frame_img,
+                        scene_description=proposition,
+                        confidence_as_token_probability=confidence_as_token_probability,
+                        threshold=threshold,
+                    )
+                    object_of_interest[proposition] = detected_object
+                    # proposition_probability_record.get(proposition).append(
+                    #     detected_object.probability
+                    # )
+                video_frame = VideoFrame(
+                    frame_idx=frame_count,
+                    timestamp=None,
+                    frame_image=frame_img,
+                    object_of_interest=object_of_interest,
+                )
+                return video_frame, object_of_interest
+            if parallel_inference:
+                results = Parallel(n_jobs=len(all_frames))(
+                    delayed(process_frame)(frame_img, i) for i, frame_img in enumerate(all_frames)
+                )
+            else:
+                results = [process_frame(frame_img, i) for i, frame_img in enumerate(all_frames)]
+            for video_frame, object_of_interest in results:
+                video_automaton.add_frame(frame=video_frame)
+                for proposition, detected_object in object_of_interest.items():
+                    proposition_probability_record[proposition].append(detected_object.probability)
+            video_automaton.add_terminal_state(add_with_terminal_label=True)
+            sys.stdout.write("\n")  # Move to the next line after processing all frames
+            result = model_checker.check_automaton(
+                states=video_automaton.states,
+                transitions=video_automaton.transitions,
+                model_type="dtmc",
+                use_filter=True,
+            )
+            output_log["specification"] = tl_spec
+            output_log["propositions"] = proposition_set
+            output_log["probability"] = round(float(str(result)), 6)
+            output_log["min_probability"] = round(float(str(result.min)), 6)
+            output_log["max_probability"] = round(float(str(result.max)), 6)
+            for (
+                proposition,
+                probabilities,
+            ) in proposition_probability_record.items():
+                avg_probability = sum(probabilities) / len(probabilities)
+                output_log["propositions_avg_probability"][proposition] = round(avg_probability, 3)
+        except Exception as e:  # noqa: BLE001
+            # print(f"\nError processing frame {frame_count}: {e}")
+            import traceback
+            print(f"\nError processing frame {frame_count}: {e}")
+            traceback.print_exc()
+    return output_log
+def evaluate_video_with_sequence_of_images(
+    vision_language_model,
+    confidence_as_token_probability: bool,
+    video_path: Path | str,
+    proposition_set: list,
+    tl_spec: str,
+    parallel_inference: bool = False,
+    num_of_frame_in_sequence: int = 3,
+    threshold: float = 0.1,
+) -> dict:
+    """Evaluate a video using the given vision language model."""
+    output_log = {
+        "specification": None,
+        "propositions": None,
+        "probability": None,
+        "min_probability": None,
+        "max_probability": None,
+        "propositions_avg_probability": {},
+    }
+    if isinstance(video_path, str):
+        video_path = Path(video_path)
+    video = read_video(video_path=video_path)
+    # TODO: if there's F in the tl_spec
+    ltl_formula = parse_tl_formula(tl_spec)
+    video_automaton = VideoAutomaton(include_initial_state=True)
+    video_automaton.set_up(proposition_set=proposition_set)
+    model_checker = StormModelChecker(
+        proposition_set=proposition_set,
+        ltl_formula=ltl_formula,
+    )
+    proposition_probability_record = {}
+    for proposition in proposition_set:
+        proposition_probability_record[proposition] = []
+    if model_checker.validate_tl_specification(ltl_formula):
+        frame_count = 0
+        all_frames: list[np.ndarray] = video.get_all_frames_of_video(
+            return_format="ndarray",
+            desired_interval_in_sec=0.5,
+        )
+        try:
+            # for frame_img in all_frames:
+            def process_frame(sequence_of_frames: list[np.ndarray], frame_count: int):
+                sys.stdout.write(f"\rProcessing frame window: {frame_count+1}/{len(frame_windows)} ")
+                sys.stdout.flush()
+                object_of_interest = {}
+                for proposition in proposition_set:
+                    detected_object = vision_language_model.detect(
+                        seq_of_frames=sequence_of_frames,
+                        scene_description=proposition,
+                        confidence_as_token_probability=confidence_as_token_probability,
+                        threshold=threshold,
+                    )
+                    object_of_interest[proposition] = detected_object
+                    # proposition_probability_record.get(proposition).append(
+                    #     detected_object.probability
+                    # )
+                    print(f"{proposition}: {detected_object.probability}")
+                video_frame = VideoFrame(
+                    frame_idx=frame_count,
+                    timestamp=None,
+                    frame_image=sequence_of_frames,
+                    object_of_interest=object_of_interest,
+                )
+                return video_frame, object_of_interest
+            if parallel_inference:
+                frame_windows = create_frame_windows(frames=all_frames, window_size=num_of_frame_in_sequence)
+                results = Parallel(n_jobs=len(frame_windows))(
+                    delayed(process_frame)(frame_img, i) for i, frame_img in enumerate(frame_windows)
+                )
+            else:
+                frame_windows = create_frame_windows(frames=all_frames, window_size=num_of_frame_in_sequence)
+                results = [process_frame(sequence_of_frames, i) for i, sequence_of_frames in enumerate(frame_windows)]
+            for video_frame, object_of_interest in results:
+                video_automaton.add_frame(frame=video_frame)
+                for proposition, detected_object in object_of_interest.items():
+                    proposition_probability_record[proposition].append(detected_object.probability)
+            video_automaton.add_terminal_state(add_with_terminal_label=True)
+            sys.stdout.write("\n")  # Move to the next line after processing all frames
+            result = model_checker.check_automaton(
+                states=video_automaton.states,
+                transitions=video_automaton.transitions,
+                model_type="dtmc",
+                use_filter=True,
+            )
+            output_log["specification"] = tl_spec
+            output_log["propositions"] = proposition_set
+            output_log["probability"] = round(float(str(result)), 6)
+            output_log["min_probability"] = round(float(str(result.min)), 6)
+            output_log["max_probability"] = round(float(str(result.max)), 6)
+            for (
+                proposition,
+                probabilities,
+            ) in proposition_probability_record.items():
+                avg_probability = sum(probabilities) / len(probabilities)
+                output_log["propositions_avg_probability"][proposition] = round(avg_probability, 3)
+        except Exception as e:  # noqa: BLE001
+            # print(f"\nError processing frame {frame_count}: {e}")
+            import traceback
+            print(f"\nError processing frame {frame_count}: {e}")
+            traceback.print_exc()
+    return output_log

neus_v/veval/parse.py ADDED Viewed

	@@ -0,0 +1,29 @@

+def parse_tl_formula(tl_spec: str) -> str:
+    """Validate the tl specification."""
+    if 'G "' in tl_spec:
+        tl_spec = tl_spec.replace('G "', 'F "')
+    tl_spec = tl_spec.replace("-", "_")
+    if tl_spec[0] == "\n":
+        tl_spec = tl_spec[1:]
+    if tl_spec[0] in ["F"]:
+        return f"P=? [{tl_spec}]"
+    if tl_spec[0] in ["G"]:
+        tl_spec = tl_spec[1:]
+        return f"P=? [F {tl_spec}]"
+    # if any(op in tl_spec for op in ["F", "G", "U"]):
+    #     return f"P=? [F ({tl_spec})]"
+    return f"P=? [F {tl_spec}]"
+def parse_proposition_set(proposition_set: list[str]) -> list[str]:
+    """Parse the proposition set."""
+    return [prop.replace("-", "_") for prop in proposition_set]
+def parse_tl_specification(tl_spec: str) -> str:
+    """Parse the tl specification."""
+    return tl_spec.replace("-", "_")

neus_v/video/__init__.py ADDED Viewed

File without changes

neus_v/video/frame.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import dataclasses
+from typing import TYPE_CHECKING
+import cv2
+if TYPE_CHECKING:
+    import numpy as np
+@dataclasses.dataclass
+class VideoFrame:
+    """Frame class."""
+    frame_idx: int
+    timestamp: int | None = None
+    frame_image: np.ndarray | None = None
+    annotated_image: dict[str, np.ndarray] = dataclasses.field(default_factory=dict)
+    detected_object_set: dict | None = None
+    object_of_interest: dict | None = None
+    activity_of_interest: dict | None = None
+    def save_frame_img(self, save_path: str) -> None:
+        """Save frame image."""
+        if self.frame_image is not None:
+            cv2.imwrite(
+                save_path,
+                self.frame_image,
+            )
+    def is_any_object_detected(self) -> bool:
+        """Check if object is detected."""
+        return len(self.detected_object_set.objects) > 0
+    @property
+    def list_of_detected_object_of_interest(self) -> list:
+        """Get detected object."""
+        detected_obj = []
+        for obj_name, obj_value in self.object_of_interest.items():
+            if obj_value.is_detected:
+                detected_obj.append(obj_name)
+        return detected_obj
+    @property
+    def detected_object_dict(self) -> dict:
+        """Get detected object info as dict."""
+        detected_obj = {}
+        for obj_name, obj_value in self.object_of_interest.items():
+            if obj_value.is_detected:
+                detected_obj[obj_name] = {}
+                detected_obj[obj_name]["total_number_of_detection"] = obj_value.number_of_detection
+                detected_obj[obj_name]["maximum_probability"] = max(obj_value.probability_of_all_obj)
+                detected_obj[obj_name]["minimum_probability"] = min(obj_value.probability_of_all_obj)
+                detected_obj[obj_name]["maximum_confidence"] = max(obj_value.confidence_of_all_obj)
+                detected_obj[obj_name]["minimum_confidence"] = min(obj_value.confidence_of_all_obj)
+        return detected_obj
+    def detected_bboxes(self, probability_threshold: bool = False) -> list:
+        """Get detected object.
+        Args:
+            probability_threshold (float | None): Probability threshold.
+            Defaults to None.
+        Returns:
+            list: Bounding boxes.
+        """
+        bboxes = []
+        for _, obj_value in self.object_of_interest.items():
+            if obj_value.is_detected:
+                if probability_threshold:
+                    for obj_prob in obj_value.probability_of_all_obj:
+                        if obj_prob > 0:
+                            bboxes += obj_value.bounding_box_of_all_obj
+                else:
+                    bboxes += obj_value.bounding_box_of_all_obj
+        return bboxes

neus_v/video/read_video.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from pathlib import Path
+from typing import TYPE_CHECKING
+from neus_v.video.video import Video, VideoFormat
+if TYPE_CHECKING:
+    import numpy as np
+def read_video(
+    video_path: str | Path | None = None,
+    sequence_of_image: list[np.ndarray] | None = None,
+) -> Video:
+    """Read video from video_path or sequence of images.
+    Args:
+        video_path (str | Path | None): Path to video file. Defaults to None.
+        sequence_of_image (list[np.ndarray] | None): Sequence of images
+            as numpy arrays. Defaults to None.
+    Returns:
+        Video: Video object.
+    Raises:
+        ValueError: If neither or both video_path and
+            sequence_of_image are provided.
+    """
+    if (video_path is None) == (sequence_of_image is None):
+        msg = "Exactly one of video_path or sequence_of_image must be provided."
+        raise ValueError(msg)
+    if video_path:
+        if isinstance(video_path, str):
+            video_path = Path(video_path)
+        if video_path.suffix == ".mp4":
+            read_format = VideoFormat.MP4
+    if sequence_of_image:
+        read_format = VideoFormat.LIST_OF_ARRAY
+    return Video(
+        video_path=video_path,
+        sequence_of_image=sequence_of_image,
+        read_format=read_format,
+    )

neus_v/video/video.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import enum
+import logging
+import uuid
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+import cv2
+from PIL import Image
+if TYPE_CHECKING:
+    from pathlib import Path
+    import numpy as np
+class VideoFormat(enum.Enum):
+    """Status Enum for the CV API."""
+    MP4 = "mp4"
+    LIST_OF_ARRAY = "list_of_array"
+@dataclass
+class VideoInfo:
+    """Represents information about a video file."""
+    format: VideoFormat
+    frame_width: int
+    frame_height: int
+    original_frame_count: int
+    video_id: uuid.UUID = field(default_factory=uuid.uuid4)
+    video_path: str | None = None
+    processed_fps: float | None = None
+    processed_frame_count: int = 1
+    original_fps: float | None = None
+class Video:
+    """vflow's Video Object."""
+    def __init__(
+        self,
+        read_format: VideoFormat,
+        video_path: str | Path | None = None,
+        sequence_of_image: list[np.ndarray] | None = None,
+    ) -> None:
+        """Video Frame Processor.
+        Args:
+            video_path (str | Path): Path to video file.
+            read_format (VideoFormat): Format to read the video in.
+            sequence_of_image (list[np.ndarray] | None): List of image arrays
+                for processing.
+        """
+        self._video_path = video_path
+        self._read_format = read_format
+        self.video_info = None
+        if sequence_of_image:
+            self.all_frames = sequence_of_image
+            if isinstance(sequence_of_image[0], list):
+                self.all_frames = sequence_of_image[0]
+        self.import_video(str(video_path))
+        self.current_frame_index = 0
+        self.video_ended = False
+    def __str__(self) -> str:
+        """Return a concise string representation of the Video object."""
+        return str(self.video_info)
+    def __repr__(self) -> str:
+        """Return a detailed string representation of the Video object."""
+        return repr(self.video_info)
+    def import_video(self, video_path: str | None) -> None:
+        """Read video from video_path.
+        Args:
+            video_path (str): Path to video file.
+        """
+        logging.info(f"Video format: {self._read_format}")
+        if self._read_format == VideoFormat.MP4:
+            self._cap = cv2.VideoCapture(video_path)
+            ret, _ = self._cap.read()
+            if not ret:
+                logging.error("Video path is invalid.")
+            self.video_info = VideoInfo(
+                video_path=str(self._video_path),
+                format=self._read_format,
+                frame_width=int(self._cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+                frame_height=int(self._cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
+                original_fps=self._cap.get(cv2.CAP_PROP_FPS),
+                original_frame_count=int(self._cap.get(cv2.CAP_PROP_FRAME_COUNT)),
+            )
+        elif self._read_format.LIST_OF_ARRAY:
+            self.video_info = VideoInfo(
+                format=self._read_format,
+                frame_width=int(self.all_frames[0].shape[0]),
+                frame_height=int(self.all_frames[0].shape[1]),
+                original_frame_count=len(self.all_frames),
+            )
+    def _resize_frame_by_scale(self, frame_img: np.ndarray, frame_scale: int) -> np.ndarray:
+        """Resize frame image.
+        Args:
+            frame_img (np.ndarray): Frame image.
+            frame_scale (int): Scale of frame.
+        Returns:
+            np.ndarray: Resized frame image.
+        """
+        return cv2.resize(
+            frame_img,
+            (
+                int(self.video_info.frame_width / frame_scale),
+                int(self.video_info.frame_height / frame_scale),
+            ),
+        )
+    def get_all_frames_of_video(
+        self,
+        return_format: str = "ndarray",
+        frame_scale: int | None = None,
+        desired_fps: int | None = None,
+        desired_interval_in_sec: int | None = None,
+    ) -> list:
+        """Get video frames by frame_scale and second_per_frame.
+        Args:
+            return_format (str, optional): Return format. Defaults to "cv2".
+                Options: [cv2, ndarray]
+            frame_scale (int | None, optional): Frame scale. Defaults to None.
+            desired_fps (int | None, optional): Desired FPS. Defaults to None.
+            desired_interval_in_sec (int | None, optional): Interval between frames in seconds.
+                If provided, frames will be extracted at this interval. Defaults to None.
+        """  # noqa: E501
+        if self._read_format == VideoFormat.LIST_OF_ARRAY:
+            resize_func = lambda img: self.process_frame_image(  # noqa: E731
+                frame_img=img,
+                frame_scale=frame_scale,
+                return_format=return_format,
+            )
+            all_frames = list(map(resize_func, self.all_frames))
+            self.processed_frame_count = len(all_frames)
+            return all_frames
+        all_frames = []
+        if self._read_format == VideoFormat.MP4 and desired_fps is None and desired_interval_in_sec is None:
+            msg = (
+                "Either desired_fps",
+                "or desired_interval_in_sec must be provided.",
+            )
+            raise ValueError(msg)
+        if self._read_format == VideoFormat.MP4:
+            frame_step = self.get_frame_step(
+                desired_fps=desired_fps,
+                desired_interval_in_sec=desired_interval_in_sec,
+            )
+            for real_frame_idx in range(0, int(self.video_info.original_frame_count), int(frame_step)):
+                self._cap.set(cv2.CAP_PROP_POS_FRAMES, real_frame_idx)
+                ret, frame_img = self._cap.read()
+                frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
+                if not ret:
+                    break
+                frame_img = self.process_frame_image(
+                    frame_img=frame_img,
+                    frame_scale=frame_scale,
+                    return_format=return_format,
+                )
+                all_frames.append(frame_img)
+            self._cap.release()
+            # cv2.destroyAllWindows()
+            self.processed_frame_count = len(all_frames)
+        return all_frames
+    def get_next_frame(
+        self,
+        return_format: str = "ndarray",
+        frame_scale: int | None = None,
+        desired_fps: int | None = None,
+        desired_interval_in_sec: int | None = None,
+    ) -> np.ndarray | None:
+        """Get the next video frame based on frame step.
+        Args:
+            return_format (str, optional): Return format. Defaults to "ndarray".
+                - [cv2, ndarray, pil]
+            frame_scale (int | None, optional): Frame scale. Defaults to None.
+            desired_fps (int | None, optional): Desired FPS. Defaults to None.
+            desired_interval_in_sec (int | None, optional): Desired interval.
+                Defaults to None.
+        Returns:
+            np.ndarray | None: The next frame as an ndarray, or None if no more
+                frames are available or the video ended.
+        """
+        if self._read_format == VideoFormat.MP4 and desired_fps is None and desired_interval_in_sec is None:
+            msg = (
+                "Either desired_fps or",
+                "desired_interval_in_sec must be provided.",
+            )
+            raise ValueError(msg)
+        if self.video_ended:
+            logging.info("No frame available.")
+            return None  # No more frames to process
+        if self._read_format == VideoFormat.MP4:
+            frame_step = self.get_frame_step(
+                desired_fps=desired_fps,
+                desired_interval_in_sec=desired_interval_in_sec,
+            )
+            # Skip to the next frame based on frame_step
+            self._cap.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame_index)
+            ret, frame_img = self._cap.read()
+            if not ret:
+                self.video_ended = True
+                return None  # No more frames or error occurred
+            # Update the current frame index for the next call
+            self.current_frame_index += frame_step
+            frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
+        if self._read_format == VideoFormat.LIST_OF_ARRAY:
+            if self.current_frame_index < len(self.all_frames):
+                frame_img = self.all_frames[self.current_frame_index]
+                self.current_frame_index += 1
+            else:
+                # No more frames available.
+                self.video_ended = True
+                return None
+        self.video_info.processed_frame_count += 1
+        return self.process_frame_image(
+            frame_img=frame_img,
+            frame_scale=frame_scale,
+            return_format=return_format,
+        )
+    def process_frame_image(
+        self,
+        frame_img: np.ndarray,
+        return_format: str = "ndarray",
+        frame_scale: int | None = None,
+    ) -> np.ndarray:
+        """Process a single frame image.
+        Args:
+            frame_img (np.ndarray): Input frame image.
+            return_format (str, optional): Desired return format.
+                Defaults to "ndarray".
+            frame_scale (int | None, optional): Scale factor for resizing.
+                Defaults to None.
+        Returns:
+            np.ndarray: Processed frame image.
+        """
+        if frame_scale is not None:
+            frame_img = self._resize_frame_by_scale(frame_img, frame_scale)
+        if return_format == "pil":
+            frame_img = Image.fromarray(frame_img).convert("RGB")
+        return frame_img
+    def get_frame_step(
+        self,
+        desired_interval_in_sec: int | None = None,
+        desired_fps: int | None = None,
+    ) -> int:
+        """Calculate the frame step based on desired interval or FPS.
+        Args:
+            desired_interval_in_sec (int | None): Desired interval between frames in seconds.
+            desired_fps (int | None): Desired frames per second.
+        Returns:
+            int: Calculated frame step.
+        """  # noqa: E501
+        if desired_fps is not None:
+            frame_step = int(round(self.video_info.original_fps / desired_fps))
+            processed_fps = desired_fps
+        if desired_interval_in_sec is not None:
+            frame_step = int(round(self.video_info.original_fps * desired_interval_in_sec))
+            processed_fps = round(1 / desired_interval_in_sec, 2)
+        self.video_info.processed_fps = processed_fps
+        return frame_step
+    def insert_annotation_to_current_frame(self, annotations: list[str]) -> None:
+        """Insert annotations to the current frame.
+        Args:
+            annotations (list[str]): List of annotations.
+        """
+    def get_video_info(self) -> VideoInfo:
+        """Return the VideoInfo object containing video information."""
+        return self.video_info

neus_v/vlm/__init__.py ADDED Viewed

File without changes

neus_v/vlm/internvl.py ADDED Viewed

	@@ -0,0 +1,373 @@

+import gc
+import logging
+import numpy as np
+import torch
+from PIL import Image
+from torch.nn.functional import softmax
+from transformers import AutoModel, AutoTokenizer
+from neus_v.vlm.internvl_utils import (
+    assign_device_map,
+    load_image,
+    load_video_from_file,
+    load_video_from_seq_of_frames,
+    split_model,
+)
+from neus_v.vlm.obj import DetectedObject
+MODEL_PATH = {
+    "InternVL2-40B": "HuggingFace Model",
+    "InternVL2-8B": "HuggingFace Model",
+    "InternVL2-2B": "HuggingFace Model",
+}
+class InternVL:
+    """InternVL's Vision Language Model."""
+    def __init__(
+        self,
+        model_name: str = "InternVL2-8B",
+        multi_gpus: bool = False,
+        device: int = 0,
+    ) -> None:
+        """Initialization the InternVL."""
+        logging.info(
+            (
+                "You are using the model based on HuggingFace API.",
+                "The model will be downloaded to the HuggingFace cache dir.",
+            )
+        )
+        self.model_name = model_name
+        self._path = f"OpenGVLab/{model_name}"
+        self._num_gpus = torch.cuda.device_count()
+        self.device = device
+        if multi_gpus:
+            device_map = split_model(model_name)
+        else:
+            device_map = assign_device_map(model_name=model_name, manual_gpu_id=device)
+        self.model = AutoModel.from_pretrained(
+            self._path,
+            torch_dtype=torch.bfloat16,
+            low_cpu_mem_usage=True,
+            use_flash_attn=True,
+            trust_remote_code=True,
+            device_map=device_map,
+        ).eval()
+        self.model.apply(self.move_tensors_to_gpu)
+        self.tokenizer = AutoTokenizer.from_pretrained(self._path, trust_remote_code=True, use_fast=False)
+    def reset_model(self) -> None:
+        """Reset the model to its initial state using pretrained weights."""
+        self.model = AutoModel.from_pretrained(
+            self._path,
+            torch_dtype=torch.bfloat16,
+            low_cpu_mem_usage=True,
+            use_flash_attn=True,
+            trust_remote_code=True,
+        ).eval()
+        self.model.apply(self.move_tensors_to_gpu)
+    def clear_gpu_memory(self) -> None:
+        """Clear CUDA cache and run garbage collection to free GPU memory."""
+        torch.cuda.empty_cache()
+        if torch.cuda.is_available():
+            torch.cuda.ipc_collect()
+        gc.collect()  # Run garbage collector
+    def move_tensors_to_gpu(
+        self,
+        module: torch.nn.Module,
+    ) -> None:
+        """Move all tensors in the module to GPU if they are on the CPU."""
+        for name, tensor in module.named_buffers():
+            if isinstance(tensor, torch.Tensor) and tensor.device.type == "cpu":
+                module.register_buffer(
+                    name,
+                    tensor.cuda(self.device),
+                    persistent=False,
+                )
+        for _, param in module.named_parameters():
+            if param.device.type == "cpu":
+                param.data = param.data.cuda(self.device)
+    def infer_with_image(
+        self,
+        language: str,
+        image: np.ndarray | None = None,
+        image_path: str | None = None,
+        max_new_tokens: int = 1024,
+        do_sample: bool = True,
+    ) -> str:
+        """Perform image inference with given video inputs."""
+        assert (  # noqa: S101
+            image is not None or image_path is not None
+        ), "One of 'image' or 'image_path' must be defined."
+        if image_path:
+            image = Image.open(image_path).convert("RGB")
+        else:
+            image = Image.fromarray(image)
+        # set the max number of tiles in `max_num`
+        pixel_values = load_image(image, max_num=12).to(torch.bfloat16).cuda(self.device)
+        generation_config = {
+            "max_new_tokens": max_new_tokens,
+            "do_sample": do_sample,
+        }
+        image_prefix = "<image>\n"
+        language = image_prefix + language
+        return self.model.chat(self.tokenizer, pixel_values, language, generation_config)
+    def infer_with_video(
+        self,
+        language: str,
+        seq_of_frames: list[np.ndarray] | None = None,
+        video_path: str | None = None,
+        max_new_tokens: int = 1024,
+        do_sample: bool = True,
+    ) -> str:
+        """Perform image inference with given video inputs."""
+        assert (  # noqa: S101
+            seq_of_frames is not None or video_path is not None
+        ), "One of 'seq_of_frames' or 'video_path' must be defined."
+        generation_config = {
+            "max_new_tokens": max_new_tokens,
+            "do_sample": do_sample,
+        }
+        if video_path:
+            pixel_values, num_patches_list = load_video_from_file(video_path)
+        else:
+            pixel_values, num_patches_list = load_video_from_seq_of_frames(seq_of_frames=seq_of_frames)
+        video_prefix = "".join([f"Frame{i+1}: <image>\n" for i in range(len(num_patches_list))])
+        language = video_prefix + language
+        return self.model.chat(
+            self.tokenizer,
+            pixel_values,
+            language,
+            generation_config,
+            num_patches_list=num_patches_list,
+            history=None,
+            return_history=True,
+        )
+    def detect(
+        self,
+        scene_description: str,
+        frame_img: np.ndarray | None = None,
+        seq_of_frames: list[np.ndarray] | None = None,
+        video_path: str | None = None,
+        threshold: float = 0.349,
+        confidence_as_token_probability: bool = True,
+    ) -> DetectedObject:
+        """Detect objects in the given frame image.
+        Args:
+            frame_img (np.ndarray): The image frame to process.
+            scene_description (str): Description of the scene.
+            seq_of_frames (list[np.ndarray] | None):
+                List of video frames to process.
+            video_path (str | None): Path to video file to process.
+            threshold (float): Detection threshold.
+            confidence_as_token_probability (bool):
+                Whether to use token probabilities for confidence.
+        Returns:
+            DetectedObject: Detected objects with their details.
+        """
+        if confidence_as_token_probability:
+            parsing_rule = [
+                "You must only return a Yes or No, and not both, to any question asked. "  # noqa: E501
+                "You must not include any other symbols, information, text, justification in your answer or repeat Yes or No multiple times.",  # noqa: E501
+                "For example, if the question is 'Is there a cat present in the Image?', the answer must only be 'Yes' or 'No'.",  # noqa: E501
+            ]
+            parsing_rule = "\n".join(parsing_rule)
+            prompt = rf"Is there a {scene_description} present in the image? " f"[PARSING RULE]\n:{parsing_rule}"
+            if seq_of_frames or video_path:
+                response, confidence = self.infer_with_video_confidence(
+                    language=prompt,
+                    seq_of_frames=seq_of_frames,
+                    video_path=video_path,
+                )
+            else:
+                response, confidence = self.infer_with_image_confidence(language=prompt, image=frame_img)
+            # TODO: Add a check for the response to be Yes or NO or clean up response better  # noqa: E501
+            if "yes" in response.lower():
+                detected = True
+                if confidence <= threshold:
+                    confidence = 0.0
+                    detected = False
+            else:
+                detected = False
+                confidence = 0.0
+        return DetectedObject(
+            name=scene_description,
+            model_name=self.model_name,
+            confidence=round(confidence, 3),
+            probability=round(confidence, 3),
+            number_of_detection=1,
+            is_detected=detected,
+        )
+    def infer_with_image_confidence(
+        self,
+        language: str,
+        image: np.ndarray | None = None,
+        image_path: str | None = None,
+        max_new_tokens: int = 1024,
+        do_sample: bool = True,
+    ) -> tuple[str, float]:
+        """Perform image inference and return response with confidence score.
+        Args:
+            language (str): The input prompt or question.
+            image (np.ndarray | None): The input image as a numpy array.
+            image_path (str | None): Path to the input image file.
+            max_new_tokens (int): Maximum number of new tokens to generate.
+            do_sample (bool): Whether to use sampling for generation.
+        Returns:
+            tuple[str, float]: Generated response and confidence score.
+        """
+        if image_path:
+            image = Image.open(image_path).convert("RGB")
+        else:
+            image = Image.fromarray(image)
+        # set the max number of tiles in `max_num`
+        pixel_values = load_image(image, max_num=12).to(torch.bfloat16).cuda(self.device)
+        generation_config = {
+            "max_new_tokens": max_new_tokens,
+            "do_sample": do_sample,
+        }
+        image_prefix = "<image>\n"
+        language = image_prefix + language
+        return self.chat_with_confidence(self.tokenizer, pixel_values, language, generation_config)
+    def chat_with_confidence(  # noqa: PLR0913
+        self,
+        tokenizer: AutoTokenizer,
+        pixel_values: torch.Tensor,
+        question: str,
+        generation_config: dict,
+        num_patches_list: list[int] | None = None,
+        IMG_START_TOKEN: str = "<img>",  # noqa: N803, S107
+        IMG_END_TOKEN: str = "</img>",  # noqa: N803, S107
+        IMG_CONTEXT_TOKEN: str = "<IMG_CONTEXT>",  # noqa: N803, S107
+        verbose: bool = False,
+    ) -> tuple[str, float]:
+        """Generate a response with confidence score for the given input.
+        Args:
+            tokenizer: The tokenizer to use.
+            pixel_values: Image tensor input.
+            question: The input question or prompt.
+            generation_config: Configuration for text generation.
+            num_patches_list: List of number of patches for video frames.
+            IMG_START_TOKEN: Token to mark the start of an image.
+            IMG_END_TOKEN: Token to mark the end of an image.
+            IMG_CONTEXT_TOKEN: Token for image context.
+            verbose: Whether to print verbose output.
+        Returns:
+            A tuple containing the generated response and its confidence score.
+        """
+        if num_patches_list is None:
+            num_patches_list = [pixel_values.shape[0]] if pixel_values is not None else []
+        assert pixel_values is None or len(pixel_values) == sum(num_patches_list)  # noqa: S101
+        img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
+        self.model.img_context_token_id = img_context_token_id
+        template = self.model.conv_template
+        template.system_message = self.model.system_message
+        eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
+        template.append_message(template.roles[0], question)
+        template.append_message(template.roles[1], None)
+        query = template.get_prompt()
+        if verbose and pixel_values is not None:
+            image_bs = pixel_values.shape[0]
+            print(f"dynamic ViT batch size: {image_bs}")  # noqa: T201
+        for num_patches in num_patches_list:
+            context_tokens = IMG_CONTEXT_TOKEN * self.model.num_image_token * num_patches
+            image_tokens = IMG_START_TOKEN + context_tokens + IMG_END_TOKEN
+            query = query.replace("<image>", image_tokens, 1)
+        model_inputs = tokenizer(query, return_tensors="pt")
+        input_ids = model_inputs["input_ids"].cuda(self.device)
+        attention_mask = model_inputs["attention_mask"].cuda(self.device)
+        generation_config["eos_token_id"] = eos_token_id
+        generation_config["return_dict_in_generate"] = True
+        generation_config["output_scores"] = True
+        generation_config["output_logits"] = True
+        generation_output = self.model.generate(
+            pixel_values=pixel_values,
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            **generation_config,
+        )
+        response = tokenizer.batch_decode(generation_output.sequences, skip_special_tokens=True)[0]
+        response = response.split(template.sep)[0].strip()
+        logits_to_compute = np.where(generation_output.sequences[0].detach().cpu().numpy() != eos_token_id)[0]
+        confidence = 1.0
+        for logit in logits_to_compute:
+            token = generation_output.sequences[0, logit].item()
+            prob = softmax(generation_output.logits[logit])[0, token]
+            confidence = prob.item() * confidence
+        self.clear_gpu_memory()
+        return response, confidence
+    def infer_with_video_confidence(
+        self,
+        language: str,
+        seq_of_frames: list[np.ndarray] | None = None,
+        video_path: str | None = None,
+        max_new_tokens: int = 1024,
+        do_sample: bool = True,
+    ) -> tuple[str, float]:
+        """Perform video inference and return response with confidence score.
+        Args:
+            language (str): The input prompt or question.
+            seq_of_frames (list[np.ndarray] | None):
+                List of video frames as numpy arrays.
+            video_path (str | None): Path to the input video file.
+            max_new_tokens (int): Maximum number of new tokens to generate.
+            do_sample (bool): Whether to use sampling for generation.
+        Returns:
+            tuple[str, float]: Generated response and confidence score.
+        """
+        assert (  # noqa: S101
+            seq_of_frames is not None or video_path is not None
+        ), "One of 'seq_of_frames' or 'video_path' must be defined."
+        generation_config = {
+            "max_new_tokens": max_new_tokens,
+            "do_sample": do_sample,
+        }
+        if video_path:
+            pixel_values, num_patches_list = load_video_from_file(video_path, device=self.device)
+        else:
+            pixel_values, num_patches_list = load_video_from_seq_of_frames(
+                seq_of_frames=seq_of_frames, device=self.device
+            )
+        video_prefix = "".join([f"Frame{i+1}: <image>\n" for i in range(len(num_patches_list))])
+        language = video_prefix + language
+        return self.chat_with_confidence(
+            self.tokenizer,
+            pixel_values,
+            language,
+            generation_config,
+            num_patches_list=num_patches_list,
+        )

neus_v/vlm/internvl_utils.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import math
+import numpy as np
+import torch
+import torchvision.transforms as T
+from decord import VideoReader, cpu
+from PIL import Image
+from torchvision.transforms.functional import InterpolationMode
+from neus_v.video.read_video import read_video
+IMAGENET_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_STD = (0.229, 0.224, 0.225)
+def build_transform(input_size: int) -> T.Compose:
+    """Builds a transformation pipeline for the given input size."""
+    mean, std = IMAGENET_MEAN, IMAGENET_STD
+    return T.Compose(
+        [
+            T.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
+            T.Resize(
+                (input_size, input_size),
+                interpolation=InterpolationMode.BICUBIC,
+            ),
+            T.ToTensor(),
+            T.Normalize(mean=mean, std=std),
+        ]
+    )
+def assign_device_map(model_name, manual_gpu_id=0):
+    device_map = {}
+    world_size = torch.cuda.device_count()
+    num_layers = {
+        "InternVL2-1B": 24,
+        "InternVL2-2B": 24,
+        "InternVL2-4B": 32,
+        "InternVL2-8B": 32,
+        "InternVL2-26B": 48,
+        "InternVL2-40B": 60,
+        "InternVL2-Llama3-76B": 80,
+    }[model_name]
+    for layer_idx in range(num_layers):
+        device_map[f"language_model.model.layers.{layer_idx}"] = manual_gpu_id
+    device_map["vision_model"] = manual_gpu_id
+    device_map["mlp1"] = manual_gpu_id
+    device_map["language_model.model.tok_embeddings"] = manual_gpu_id
+    device_map["language_model.model.embed_tokens"] = manual_gpu_id
+    device_map["language_model.output"] = manual_gpu_id
+    device_map["language_model.model.norm"] = manual_gpu_id
+    device_map["language_model.lm_head"] = manual_gpu_id
+    device_map[f"language_model.model.layers.{num_layers - 1}"] = manual_gpu_id
+    return device_map
+def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
+    best_ratio_diff = float("inf")
+    best_ratio = (1, 1)
+    area = width * height
+    for ratio in target_ratios:
+        target_aspect_ratio = ratio[0] / ratio[1]
+        ratio_diff = abs(aspect_ratio - target_aspect_ratio)
+        if ratio_diff < best_ratio_diff:
+            best_ratio_diff = ratio_diff
+            best_ratio = ratio
+        elif ratio_diff == best_ratio_diff:
+            if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
+                best_ratio = ratio
+    return best_ratio
+def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
+    # Convert numpy array to PIL Image if needed
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    orig_width, orig_height = image.size
+    aspect_ratio = orig_width / orig_height
+    # calculate the existing image aspect ratio
+    target_ratios = set(
+        (i, j)
+        for n in range(min_num, max_num + 1)
+        for i in range(1, n + 1)
+        for j in range(1, n + 1)
+        if i * j <= max_num and i * j >= min_num
+    )
+    target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
+    # find the closest aspect ratio to the target
+    target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, target_ratios, orig_width, orig_height, image_size)
+    # calculate the target width and height
+    target_width = image_size * target_aspect_ratio[0]
+    target_height = image_size * target_aspect_ratio[1]
+    blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
+    # resize the image
+    resized_img = image.resize((target_width, target_height))
+    processed_images = []
+    for i in range(blocks):
+        box = (
+            (i % (target_width // image_size)) * image_size,
+            (i // (target_width // image_size)) * image_size,
+            ((i % (target_width // image_size)) + 1) * image_size,
+            ((i // (target_width // image_size)) + 1) * image_size,
+        )
+        # split the image
+        split_img = resized_img.crop(box)
+        processed_images.append(split_img)
+    assert len(processed_images) == blocks
+    if use_thumbnail and len(processed_images) != 1:
+        thumbnail_img = image.resize((image_size, image_size))
+        processed_images.append(thumbnail_img)
+    return processed_images
+def load_image(image, input_size=448, max_num=12):
+    transform = build_transform(input_size=input_size)
+    images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
+    pixel_values = [transform(image) for image in images]
+    pixel_values = torch.stack(pixel_values)
+    return pixel_values
+def split_model(model_name):
+    device_map = {}
+    world_size = torch.cuda.device_count()
+    num_layers = {
+        "InternVL2-1B": 24,
+        "InternVL2-2B": 24,
+        "InternVL2-4B": 32,
+        "InternVL2-8B": 32,
+        "InternVL2-26B": 48,
+        "InternVL2-40B": 60,
+        "InternVL2-Llama3-76B": 80,
+    }[model_name]
+    # Since the first GPU will be used for ViT, treat it as half a GPU.
+    num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5))
+    num_layers_per_gpu = [num_layers_per_gpu] * world_size
+    num_layers_per_gpu[0] = math.ceil(num_layers_per_gpu[0] * 0.5)
+    layer_cnt = 0
+    for i, num_layer in enumerate(num_layers_per_gpu):
+        for j in range(num_layer):
+            device_map[f"language_model.model.layers.{layer_cnt}"] = i
+            layer_cnt += 1
+    device_map["vision_model"] = 0
+    device_map["mlp1"] = 0
+    device_map["language_model.model.tok_embeddings"] = 0
+    device_map["language_model.model.embed_tokens"] = 0
+    device_map["language_model.output"] = 0
+    device_map["language_model.model.norm"] = 0
+    device_map["language_model.lm_head"] = 0
+    device_map[f"language_model.model.layers.{num_layers - 1}"] = 0
+    return device_map
+def move_tensors_to_gpu(module):
+    for name, tensor in module.named_buffers():
+        if isinstance(tensor, torch.Tensor) and tensor.device.type == "cpu":
+            module.register_buffer(name, tensor.cuda(), persistent=False)
+    for _, param in module.named_parameters():
+        if param.device.type == "cpu":
+            param.data = param.data.cuda()
+# video multi-round conversation (视频多轮对话)
+def get_index(bound, fps, max_frame, first_idx=0, num_segments=32):
+    if bound:
+        start, end = bound[0], bound[1]
+    else:
+        start, end = -100000, 100000
+    start_idx = max(first_idx, round(start * fps))
+    end_idx = min(round(end * fps), max_frame)
+    seg_size = float(end_idx - start_idx) / num_segments
+    frame_indices = np.array(
+        [int(start_idx + (seg_size / 2) + np.round(seg_size * idx)) for idx in range(num_segments)]
+    )
+    return frame_indices
+def load_video_from_file(
+    video_path: str, input_size=448, max_num=1, device="cuda", dtype=torch.bfloat16  # Add dtype parameter
+):
+    video = read_video(video_path)
+    pixel_values_list, num_patches_list = [], []
+    transform = build_transform(input_size=input_size)
+    while True:
+        img: np.ndarray = video.get_next_frame(
+            return_format="pil",
+            desired_interval_in_sec=1,
+        )
+        if img is None:
+            break  # No more frames or end of video
+        img = dynamic_preprocess(img, image_size=input_size, use_thumbnail=True, max_num=max_num)
+        pixel_values = [transform(tile) for tile in img]
+        pixel_values = torch.stack(pixel_values)
+        num_patches_list.append(pixel_values.shape[0])
+        pixel_values_list.append(pixel_values.to(device))
+    return torch.cat(pixel_values_list), num_patches_list
+def load_video_from_seq_of_frames(
+    seq_of_frames: list[np.ndarray],
+    input_size=448,
+    max_num=1,
+    device="cuda",
+    dtype=torch.bfloat16,  # Add dtype parameter
+):
+    pixel_values_list, num_patches_list = [], []
+    transform = build_transform(input_size=input_size)
+    for img in seq_of_frames:
+        img = dynamic_preprocess(img, image_size=input_size, use_thumbnail=True, max_num=max_num)
+        pixel_values = [transform(tile) for tile in img]
+        pixel_values = torch.stack(pixel_values).to(dtype=dtype, device=device)  # Convert to bfloat16
+        num_patches_list.append(pixel_values.shape[0])
+        pixel_values_list.append(pixel_values)
+    return torch.cat(pixel_values_list), num_patches_list
+def load_video(video_path, bound=None, input_size=448, max_num=1, num_segments=32):
+    vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
+    max_frame = len(vr) - 1
+    fps = float(vr.get_avg_fps())
+    pixel_values_list, num_patches_list = [], []
+    transform = build_transform(input_size=input_size)
+    frame_indices = get_index(bound, fps, max_frame, first_idx=0, num_segments=num_segments)
+    for frame_index in frame_indices:
+        img = Image.fromarray(vr[frame_index].asnumpy()).convert("RGB")
+        img = dynamic_preprocess(img, image_size=input_size, use_thumbnail=True, max_num=max_num)
+        pixel_values = [transform(tile) for tile in img]
+        pixel_values = torch.stack(pixel_values)
+        num_patches_list.append(pixel_values.shape[0])
+        pixel_values_list.append(pixel_values.to(torch.bfloat16))
+    pixel_values = torch.cat(pixel_values_list)
+    return pixel_values, num_patches_list

neus_v/vlm/obj.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import dataclasses
+import enum
+import logging
+from typing import Any
+class Status(enum.Enum):
+    """Status Enum for the CV API."""
+    UNKNOWN = 0
+    SUCCESS = 1
+    RUNNING = 2
+    FAILURE = 3
+    INVALID = 4
+class DetectedObject:
+    """Detected Object class."""
+    name: str | None
+    confidence: float = 0.0
+    probability: float = 0.0
+    confidence_of_all_obj: list[float] | None = dataclasses.field(default_factory=list)
+    probability_of_all_obj: list[float] | None = dataclasses.field(default_factory=list)
+    all_obj_detected: list[Any] | None = None
+    number_of_detection: int = 0
+    is_detected: bool | Status = Status.UNKNOWN
+    model_name: str | None = None
+    bounding_box_of_all_obj: list[Any] | None = None
+    def __post_init__(self) -> None:
+        """Post init."""
+        if self.confidence_of_all_obj is not None and len(self.confidence_of_all_obj) > 0:
+            self.confidence = max(self.confidence_of_all_obj)
+        if self.probability_of_all_obj and len(self.probability_of_all_obj) > 0:
+            self.probability = max(self.probability_of_all_obj)
+    def get_probability(self) -> float:
+        """Get probability."""
+        if self.probability > 0:
+            return self.probability
+        if self.confidence > 0 and self.probability == 0:
+            logging.info("Probability is not set, using confidence: %f", self.confidence)
+            return self.confidence
+        return self.probability

setup.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from setuptools import find_packages, setup
+setup(
+    name="NeuS-V",
+    version="0.1",
+    packages=find_packages(),
+)