Spaces:

X-iZhang
/

Libra

Running

App Files Files Community

X-iZhang commited on Jan 19

Commit

e1b2b95

verified ·

1 Parent(s): 97a468f

Upload run_libra.py

Browse files

Files changed (1) hide show

libra/eval/run_libra.py +29 -10

libra/eval/run_libra.py CHANGED Viewed

@@ -14,6 +14,21 @@ from io import BytesIO
 from pydicom.pixel_data_handlers.util import apply_voi_lut
 import datetime
 def load_images(image_file):
     """
@@ -77,7 +92,7 @@ def load_images(image_file):
     return image
-def get_image_tensors(image_path, image_processor, model, device='cpu'):
     # Load and preprocess the images
     if isinstance(image_path, str):
         image = []
@@ -118,19 +133,24 @@ def libra_eval(
     model_base=None,
     image_file=None,
     query=None,
-    conv_mode="libra_v1",
     temperature=0.2,
     top_p=None,
     num_beams=1,
     num_return_sequences=None,
     length_penalty=1.0,
-    max_new_tokens=128
 ):
     # Model
     disable_torch_init()
-    model_name = get_model_name_from_path(model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name)
     qs = query
     if model.config.mm_use_im_start_end:
@@ -151,7 +171,7 @@ def libra_eval(
     conv.append_message(conv.roles[1], None)
     prompt = conv.get_prompt()
-    input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to("cpu")
     attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
     pad_token_id = tokenizer.pad_token_id
@@ -162,7 +182,7 @@ def libra_eval(
     stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
     with torch.inference_mode():
         if num_beams > 1:
             output_ids = model.generate(
                 input_ids=input_ids,
@@ -192,7 +212,7 @@ def libra_eval(
                 pad_token_id=pad_token_id,
                 stopping_criteria=[stopping_criteria],
                 use_cache=True)
     input_token_len = input_ids.shape[1]
     n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
@@ -205,8 +225,7 @@ def libra_eval(
     if outputs.endswith(stop_str):
         outputs = outputs[:-len(stop_str)]
     outputs = outputs.strip()
-    print("outputs",outputs)
     return outputs
 if __name__ == "__main__":

 from pydicom.pixel_data_handlers.util import apply_voi_lut
 import datetime
+def load_model(model_path, model_base=None):
+    """
+    Load the model and return its components.
+    Args:
+        model_path (str): Path to the model.
+        model_base (str): Base model, if any.
+    Returns:
+        tuple: (tokenizer, model, image_processor, context_len)
+    """
+    disable_torch_init()
+    model_name = get_model_name_from_path(model_path)
+    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name)
+    return tokenizer, model, image_processor, context_len
 def load_images(image_file):
     """
     return image
+def get_image_tensors(image_path, image_processor, model, device='cuda'):
     # Load and preprocess the images
     if isinstance(image_path, str):
         image = []
     model_base=None,
     image_file=None,
     query=None,
+    conv_mode=None,
     temperature=0.2,
     top_p=None,
     num_beams=1,
     num_return_sequences=None,
     length_penalty=1.0,
+    max_new_tokens=128,
+    libra_model=None
 ):
     # Model
     disable_torch_init()
+    if libra_model is not None:
+        tokenizer, model, image_processor, context_len = libra_model
+        model_name = model.config._name_or_path
+    else:
+        tokenizer, model, image_processor, context_len = load_model(model_path, model_base)
+        model_name = get_model_name_from_path(model_path)
     qs = query
     if model.config.mm_use_im_start_end:
     conv.append_message(conv.roles[1], None)
     prompt = conv.get_prompt()
+    input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
     attention_mask = torch.ones(input_ids.shape, dtype=torch.long)
     pad_token_id = tokenizer.pad_token_id
     stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
     with torch.inference_mode():
+        torch.cuda.empty_cache()
         if num_beams > 1:
             output_ids = model.generate(
                 input_ids=input_ids,
                 pad_token_id=pad_token_id,
                 stopping_criteria=[stopping_criteria],
                 use_cache=True)
     input_token_len = input_ids.shape[1]
     n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
     if outputs.endswith(stop_str):
         outputs = outputs[:-len(stop_str)]
     outputs = outputs.strip()
     return outputs
 if __name__ == "__main__":