File size: 14,161 Bytes
920f22f 79f6e49 920f22f 6131f9b 920f22f 79f6e49 f952993 79f6e49 f952993 79f6e49 6131f9b 79f6e49 6131f9b 79f6e49 a954ee1 79f6e49 dd538c2 79f6e49 dd538c2 79f6e49 6131f9b 79f6e49 f952993 79f6e49 f952993 79f6e49 6131f9b 79f6e49 f952993 920f22f 6131f9b 920f22f a954ee1 f952993 6131f9b 79f6e49 920f22f 6131f9b 920f22f 6131f9b 920f22f 6131f9b 920f22f 6131f9b 920f22f 6131f9b 920f22f 6131f9b 920f22f 6131f9b 920f22f 6131f9b 920f22f 6131f9b 920f22f 79f6e49 920f22f 79f6e49 920f22f 6131f9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 |
import torch
import os
import sys
import gradio as gr
from PIL import Image
import traceback
import types
import importlib.util
import importlib.machinery
import importlib.abc
print("=" * 50)
print("InternVL2 IMAGE & TEXT ANALYSIS")
print("=" * 50)
# System information
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
# Memory info
print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"Allocated GPU memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
print(f"Reserved GPU memory: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
else:
print("CUDA is not available. This application requires GPU acceleration.")
# In-memory mock implementation
def create_in_memory_flash_attn_mock():
"""Create a completely in-memory flash_attn mock with all required attributes"""
print("Setting up in-memory flash_attn mock...")
# Create a dummy module finder and loader for the mock
class DummyFinder(importlib.abc.MetaPathFinder):
def find_spec(self, fullname, path, target=None):
if fullname == 'flash_attn' or fullname.startswith('flash_attn.'):
return self.create_spec(fullname)
elif fullname == 'flash_attn_2_cuda':
return self.create_spec(fullname)
return None
def create_spec(self, fullname):
# Create a spec
loader = DummyLoader(fullname)
spec = importlib.machinery.ModuleSpec(
name=fullname,
loader=loader,
is_package=fullname.count('.') == 0 or fullname.split('.')[-1] == ''
)
return spec
class DummyLoader(importlib.abc.Loader):
def __init__(self, fullname):
self.fullname = fullname
def create_module(self, spec):
module = types.ModuleType(spec.name)
# Set default attributes for any module
module.__spec__ = spec
module.__loader__ = self
module.__file__ = f"<{spec.name}>"
module.__path__ = []
module.__package__ = spec.name.rpartition('.')[0] if '.' in spec.name else ''
if spec.name == 'flash_attn':
# Add flash_attn-specific attributes
module.__version__ = "0.0.0-mocked"
# Add flash_attn functions
module.flash_attn_func = lambda *args, **kwargs: None
module.flash_attn_kvpacked_func = lambda *args, **kwargs: None
module.flash_attn_qkvpacked_func = lambda *args, **kwargs: None
return module
def exec_module(self, module):
# Nothing to execute
pass
# Remove any existing modules to avoid conflicts
for name in list(sys.modules.keys()):
if name == 'flash_attn' or name.startswith('flash_attn.') or name == 'flash_attn_2_cuda':
del sys.modules[name]
# Register our finder at the beginning of meta_path
sys.meta_path.insert(0, DummyFinder())
# Pre-create and configure the flash_attn module
spec = importlib.machinery.ModuleSpec(
name='flash_attn',
loader=DummyLoader('flash_attn'),
is_package=True
)
flash_attn = importlib.util.module_from_spec(spec)
sys.modules['flash_attn'] = flash_attn
# Add attributes used by transformers checks
flash_attn.__version__ = "0.0.0-mocked"
# Create common submodules - without 'parent' parameter
for submodule in ['flash_attn.flash_attn_interface', 'flash_attn.flash_attn_triton']:
parts = submodule.split('.')
parent_name = '.'.join(parts[:-1])
child_name = parts[-1]
parent = sys.modules[parent_name]
# Create submodule spec - removed 'parent' parameter
subspec = importlib.machinery.ModuleSpec(
name=submodule,
loader=DummyLoader(submodule),
is_package=False
)
# Create and register submodule
module = importlib.util.module_from_spec(subspec)
setattr(parent, child_name, module)
sys.modules[submodule] = module
# Create flash_attn_2_cuda module
cuda_spec = importlib.machinery.ModuleSpec(
name='flash_attn_2_cuda',
loader=DummyLoader('flash_attn_2_cuda'),
is_package=False
)
cuda_module = importlib.util.module_from_spec(cuda_spec)
sys.modules['flash_attn_2_cuda'] = cuda_module
# Set environment variables to disable flash attention
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Avoid online checks
# Verify the mock was created successfully
try:
import flash_attn
print(f"β Mock flash_attn loaded successfully: {flash_attn.__version__}")
print(f"β flash_attn.__spec__ exists: {flash_attn.__spec__ is not None}")
# Let's explicitly check for __spec__ in importlib.util.find_spec
spec = importlib.util.find_spec("flash_attn")
print(f"β importlib.util.find_spec returns: {spec is not None}")
# Check that parent/child relationships work
import flash_attn.flash_attn_interface
print("β flash_attn.flash_attn_interface loaded")
# Check CUDA module
import flash_attn_2_cuda
print("β flash_attn_2_cuda loaded")
except Exception as e:
print(f"WARNING: Error verifying flash_attn mock: {e}")
traceback.print_exc()
# Now set up the mock BEFORE importing transformers
create_in_memory_flash_attn_mock()
# Import transformers AFTER setting up mock
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
# Create a function to load the model
def load_model():
try:
print("\nLoading InternVL2 model...")
# Load the model and tokenizer
# FIXED: Corrected model name from InternVL2-8B to InternVL2
model_path = "OpenGVLab/InternVL2"
# Print downloading status
print("Downloading model shards. This may take some time...")
# Load the model - with careful error handling
try:
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
low_cpu_mem_usage=True,
device_map="auto" if torch.cuda.is_available() else None,
trust_remote_code=True
)
except Exception as e:
# If we get the flash_attn error, print detailed information
if "flash_attn.__spec__ is not set" in str(e):
print("\nβ Flash attention error detected!")
# See if our mock is still in place
if 'flash_attn' in sys.modules:
mock = sys.modules['flash_attn']
print(f"Flash mock exists: {mock}")
print(f"Flash mock __spec__: {getattr(mock, '__spec__', 'NOT SET')}")
else:
print("flash_attn module was removed from sys.modules")
# Diagnostic info
print("\nCurrent state of sys.meta_path:")
for i, finder in enumerate(sys.meta_path):
print(f" {i}: {finder.__class__.__name__}")
# Re-raise the exception
raise
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_path,
use_fast=False,
trust_remote_code=True
)
# Set generation config
generation_config = GenerationConfig.from_pretrained(
model_path,
trust_remote_code=True
)
print("β Model and tokenizer loaded successfully!")
return model, tokenizer, generation_config
except Exception as e:
print(f"\nβ ERROR loading model: {str(e)}")
traceback.print_exc()
return None, None, None
# Helper function to load and process an image
def load_image(image_path, processor=None):
"""Load an image and prepare it for the model."""
if isinstance(image_path, str):
if image_path.startswith('http'):
import requests
from io import BytesIO
try:
response = requests.get(image_path, timeout=10)
image = Image.open(BytesIO(response.content)).convert('RGB')
except Exception as e:
print(f"Error loading image from URL: {e}")
# Return a default image or raise an error
image = Image.new('RGB', (224, 224), color='gray')
else:
image = Image.open(image_path).convert('RGB')
else:
image = image_path
# No need to process, the model handles that internally
return image
# Function to analyze an image with text
def analyze_image(model, tokenizer, image, prompt, generation_config):
try:
# Prepare inputs
text_prompt = f"USER: <image>\n{prompt}\nASSISTANT:"
# Convert inputs for the model
inputs = tokenizer([text_prompt], return_tensors="pt")
# Move inputs to the right device
if torch.cuda.is_available():
inputs = {k: v.to("cuda") for k, v in inputs.items()}
# Add image to the inputs
inputs["images"] = [image]
# Generate a response
with torch.no_grad():
outputs = model.generate(
**inputs,
generation_config=generation_config,
max_new_tokens=512,
)
# Decode the outputs
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
assistant_response = generated_text.split("ASSISTANT:")[-1].strip()
return assistant_response
except Exception as e:
error_msg = f"Error analyzing image: {str(e)}"
traceback.print_exc()
return error_msg
# Create the Gradio interface
def create_interface():
# Load model at startup
model, tokenizer, generation_config = load_model()
if model is None:
# If model loading failed, create a simple error interface
with gr.Blocks(title="InternVL2 Image Analysis - Error") as demo:
gr.Markdown("# β Error: Failed to load models")
gr.Markdown("Please check the console for error details.")
return demo
# Predefined prompts for analysis
prompts = [
"Describe this image in detail.",
"What can you tell me about this image?",
"Is there any text in this image? If so, can you read it?",
"What is the main subject of this image?",
"What emotions or feelings does this image convey?",
"Describe the composition and visual elements of this image.",
"Summarize what you see in this image in one paragraph."
]
# Create the full interface
with gr.Blocks(title="InternVL2 Image Analysis") as demo:
gr.Markdown("# πΌοΈ InternVL2 Image & Text Analyzer")
gr.Markdown("### Upload an image and ask questions about it")
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(type="pil", label="Upload Image")
prompt_input = gr.Dropdown(
choices=prompts,
value=prompts[0],
label="Select a prompt or enter your own below",
allow_custom_value=True
)
custom_prompt = gr.Textbox(label="Custom prompt", placeholder="Enter your custom prompt here...")
analyze_btn = gr.Button("Analyze Image", variant="primary")
with gr.Column(scale=1):
output = gr.Textbox(label="Analysis Results", lines=15)
# Example images - Using stable URLs from GitHub repositories
gr.Examples(
examples=[
["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen_sink/files/cheetah1.jpg", "What's in this image?"],
["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen_sink/files/lion.jpg", "Describe this animal."],
],
inputs=[input_image, custom_prompt],
)
# When prompt dropdown changes, update custom prompt
prompt_input.change(fn=lambda x: x, inputs=prompt_input, outputs=custom_prompt)
# Set up the click event for analysis
def on_analyze_click(image, prompt_text):
if image is None:
return "Please upload an image first."
# Use either the dropdown selection or custom prompt
final_prompt = prompt_text if prompt_text.strip() else prompt_input
result = analyze_image(model, tokenizer, image, final_prompt, generation_config)
return result
analyze_btn.click(
fn=on_analyze_click,
inputs=[input_image, custom_prompt],
outputs=output
)
return demo
# Main function
if __name__ == "__main__":
# Set environment variable for better GPU memory management
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
# Create and launch the interface
demo = create_interface()
demo.launch(share=False, server_name="0.0.0.0")
|