Spaces:
Build error
Build error
Upload 2 files
Browse files
ComfyUI/custom_nodes/img2txt-comfyui-nodes/src/blip_img2txt.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from PIL import Image
|
| 2 |
from transformers import (
|
| 3 |
BlipProcessor,
|
|
@@ -9,7 +10,7 @@ from transformers import (
|
|
| 9 |
|
| 10 |
import torch
|
| 11 |
import model_management
|
| 12 |
-
|
| 13 |
|
| 14 |
class BLIPImg2Txt:
|
| 15 |
def __init__(
|
|
@@ -21,21 +22,24 @@ class BLIPImg2Txt:
|
|
| 21 |
repetition_penalty: float,
|
| 22 |
search_beams: int,
|
| 23 |
model_id: str = "Salesforce/blip-image-captioning-large",
|
|
|
|
| 24 |
):
|
| 25 |
self.conditional_caption = conditional_caption
|
| 26 |
self.model_id = model_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
# Determine do_sample and num_beams
|
| 29 |
if temperature > 1.1 or temperature < 0.90:
|
| 30 |
do_sample = True
|
| 31 |
-
num_beams = 1
|
| 32 |
else:
|
| 33 |
do_sample = False
|
| 34 |
-
num_beams =
|
| 35 |
-
search_beams if search_beams > 1 else 1
|
| 36 |
-
) # Use beam search if num_beams > 1
|
| 37 |
|
| 38 |
-
# Initialize text config kwargs
|
| 39 |
self.text_config_kwargs = {
|
| 40 |
"do_sample": do_sample,
|
| 41 |
"max_length": max_words,
|
|
@@ -51,18 +55,25 @@ class BLIPImg2Txt:
|
|
| 51 |
if image.mode != "RGB":
|
| 52 |
image = image.convert("RGB")
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
-
config_text = BlipTextConfig.from_pretrained(self.model_id)
|
| 58 |
config_text.update(self.text_config_kwargs)
|
| 59 |
-
config_vision = BlipVisionConfig.from_pretrained(
|
| 60 |
config = BlipConfig.from_text_vision_configs(config_text, config_vision)
|
| 61 |
|
| 62 |
model = BlipForConditionalGeneration.from_pretrained(
|
| 63 |
-
|
| 64 |
config=config,
|
| 65 |
torch_dtype=torch.float16,
|
|
|
|
| 66 |
).to(model_management.get_torch_device())
|
| 67 |
|
| 68 |
inputs = processor(
|
|
@@ -78,4 +89,4 @@ class BLIPImg2Txt:
|
|
| 78 |
del model
|
| 79 |
torch.cuda.empty_cache()
|
| 80 |
|
| 81 |
-
return ret
|
|
|
|
| 1 |
+
import os
|
| 2 |
from PIL import Image
|
| 3 |
from transformers import (
|
| 4 |
BlipProcessor,
|
|
|
|
| 10 |
|
| 11 |
import torch
|
| 12 |
import model_management
|
| 13 |
+
import folder_paths
|
| 14 |
|
| 15 |
class BLIPImg2Txt:
|
| 16 |
def __init__(
|
|
|
|
| 22 |
repetition_penalty: float,
|
| 23 |
search_beams: int,
|
| 24 |
model_id: str = "Salesforce/blip-image-captioning-large",
|
| 25 |
+
custom_model_path: str = None,
|
| 26 |
):
|
| 27 |
self.conditional_caption = conditional_caption
|
| 28 |
self.model_id = model_id
|
| 29 |
+
self.custom_model_path = custom_model_path
|
| 30 |
+
|
| 31 |
+
if self.custom_model_path and os.path.exists(self.custom_model_path):
|
| 32 |
+
self.model_path = self.custom_model_path
|
| 33 |
+
else:
|
| 34 |
+
self.model_path = folder_paths.get_full_path("blip", model_id)
|
| 35 |
|
|
|
|
| 36 |
if temperature > 1.1 or temperature < 0.90:
|
| 37 |
do_sample = True
|
| 38 |
+
num_beams = 1
|
| 39 |
else:
|
| 40 |
do_sample = False
|
| 41 |
+
num_beams = search_beams if search_beams > 1 else 1
|
|
|
|
|
|
|
| 42 |
|
|
|
|
| 43 |
self.text_config_kwargs = {
|
| 44 |
"do_sample": do_sample,
|
| 45 |
"max_length": max_words,
|
|
|
|
| 55 |
if image.mode != "RGB":
|
| 56 |
image = image.convert("RGB")
|
| 57 |
|
| 58 |
+
if self.model_path and os.path.exists(self.model_path):
|
| 59 |
+
model_path = self.model_path
|
| 60 |
+
local_files_only = True
|
| 61 |
+
else:
|
| 62 |
+
model_path = self.model_id
|
| 63 |
+
local_files_only = False
|
| 64 |
+
|
| 65 |
+
processor = BlipProcessor.from_pretrained(model_path, local_files_only=local_files_only)
|
| 66 |
|
| 67 |
+
config_text = BlipTextConfig.from_pretrained(model_path, local_files_only=local_files_only)
|
|
|
|
| 68 |
config_text.update(self.text_config_kwargs)
|
| 69 |
+
config_vision = BlipVisionConfig.from_pretrained(model_path, local_files_only=local_files_only)
|
| 70 |
config = BlipConfig.from_text_vision_configs(config_text, config_vision)
|
| 71 |
|
| 72 |
model = BlipForConditionalGeneration.from_pretrained(
|
| 73 |
+
model_path,
|
| 74 |
config=config,
|
| 75 |
torch_dtype=torch.float16,
|
| 76 |
+
local_files_only=local_files_only
|
| 77 |
).to(model_management.get_torch_device())
|
| 78 |
|
| 79 |
inputs = processor(
|
|
|
|
| 89 |
del model
|
| 90 |
torch.cuda.empty_cache()
|
| 91 |
|
| 92 |
+
return ret
|
ComfyUI/custom_nodes/img2txt-comfyui-nodes/src/img2txt_node.py
CHANGED
|
@@ -14,6 +14,8 @@ from .mini_cpm_img2txt import MiniPCMImg2Txt
|
|
| 14 |
|
| 15 |
from typing import Tuple
|
| 16 |
|
|
|
|
|
|
|
| 17 |
|
| 18 |
class Img2TxtNode:
|
| 19 |
CATEGORY = "img2txt"
|
|
@@ -145,6 +147,11 @@ class Img2TxtNode:
|
|
| 145 |
|
| 146 |
captions = []
|
| 147 |
if use_all_models or use_blip_model:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
blip = BLIPImg2Txt(
|
| 149 |
conditional_caption=blip_caption_prefix,
|
| 150 |
min_words=min_words,
|
|
@@ -152,6 +159,7 @@ class Img2TxtNode:
|
|
| 152 |
temperature=temperature,
|
| 153 |
repetition_penalty=repetition_penalty,
|
| 154 |
search_beams=search_beams,
|
|
|
|
| 155 |
)
|
| 156 |
captions.append(blip.generate_caption(raw_image))
|
| 157 |
|
|
|
|
| 14 |
|
| 15 |
from typing import Tuple
|
| 16 |
|
| 17 |
+
import os
|
| 18 |
+
import folder_paths
|
| 19 |
|
| 20 |
class Img2TxtNode:
|
| 21 |
CATEGORY = "img2txt"
|
|
|
|
| 147 |
|
| 148 |
captions = []
|
| 149 |
if use_all_models or use_blip_model:
|
| 150 |
+
blip_model_path = folder_paths.get_folder_paths("blip")[0]
|
| 151 |
+
print(f"blip_model_path: {blip_model_path}")
|
| 152 |
+
if not blip_model_path or not os.path.exists(blip_model_path):
|
| 153 |
+
raise ValueError("BLIP model 'blip-image-captioning-large' not found in ComfyUI models directory. Please ensure it's in the 'models/blip' folder.")
|
| 154 |
+
|
| 155 |
blip = BLIPImg2Txt(
|
| 156 |
conditional_caption=blip_caption_prefix,
|
| 157 |
min_words=min_words,
|
|
|
|
| 159 |
temperature=temperature,
|
| 160 |
repetition_penalty=repetition_penalty,
|
| 161 |
search_beams=search_beams,
|
| 162 |
+
custom_model_path=blip_model_path
|
| 163 |
)
|
| 164 |
captions.append(blip.generate_caption(raw_image))
|
| 165 |
|