File size: 14,161 Bytes
920f22f
 
 
 
 
 
79f6e49
 
 
 
920f22f
 
6131f9b
920f22f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79f6e49
 
 
 
f952993
79f6e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f952993
79f6e49
 
 
 
 
 
6131f9b
79f6e49
 
 
 
 
 
6131f9b
79f6e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a954ee1
79f6e49
 
 
 
 
 
dd538c2
79f6e49
 
 
dd538c2
79f6e49
 
 
 
 
 
6131f9b
79f6e49
 
 
 
 
 
 
 
f952993
79f6e49
 
 
f952993
79f6e49
6131f9b
 
79f6e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f952993
920f22f
 
 
6131f9b
920f22f
 
a954ee1
 
f952993
6131f9b
 
 
79f6e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920f22f
6131f9b
 
 
 
 
 
 
 
 
 
 
920f22f
 
 
 
 
 
 
 
 
 
 
 
 
 
6131f9b
 
 
 
 
 
 
 
 
 
 
 
920f22f
 
 
6131f9b
920f22f
 
 
 
 
6131f9b
 
 
 
 
 
 
 
 
 
 
 
920f22f
 
6131f9b
 
 
 
 
 
 
 
 
 
 
 
 
 
920f22f
 
 
 
 
 
 
 
 
 
 
 
 
6131f9b
920f22f
 
 
 
 
 
 
6131f9b
 
920f22f
6131f9b
 
 
920f22f
 
 
 
6131f9b
 
920f22f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79f6e49
920f22f
 
79f6e49
 
920f22f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6131f9b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import torch
import os
import sys
import gradio as gr
from PIL import Image
import traceback
import types
import importlib.util
import importlib.machinery
import importlib.abc

print("=" * 50)
print("InternVL2 IMAGE & TEXT ANALYSIS")
print("=" * 50)

# System information
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU count: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        
    # Memory info
    print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"Allocated GPU memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    print(f"Reserved GPU memory: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
else:
    print("CUDA is not available. This application requires GPU acceleration.")

# In-memory mock implementation
def create_in_memory_flash_attn_mock():
    """Create a completely in-memory flash_attn mock with all required attributes"""
    print("Setting up in-memory flash_attn mock...")
    
    # Create a dummy module finder and loader for the mock
    class DummyFinder(importlib.abc.MetaPathFinder):
        def find_spec(self, fullname, path, target=None):
            if fullname == 'flash_attn' or fullname.startswith('flash_attn.'):
                return self.create_spec(fullname)
            elif fullname == 'flash_attn_2_cuda':
                return self.create_spec(fullname)
            return None
            
        def create_spec(self, fullname):
            # Create a spec
            loader = DummyLoader(fullname)
            spec = importlib.machinery.ModuleSpec(
                name=fullname,
                loader=loader,
                is_package=fullname.count('.') == 0 or fullname.split('.')[-1] == ''
            )
            return spec
    
    class DummyLoader(importlib.abc.Loader):
        def __init__(self, fullname):
            self.fullname = fullname
            
        def create_module(self, spec):
            module = types.ModuleType(spec.name)
            
            # Set default attributes for any module
            module.__spec__ = spec
            module.__loader__ = self
            module.__file__ = f"<{spec.name}>"
            module.__path__ = []
            module.__package__ = spec.name.rpartition('.')[0] if '.' in spec.name else ''
            
            if spec.name == 'flash_attn':
                # Add flash_attn-specific attributes
                module.__version__ = "0.0.0-mocked"
            
                # Add flash_attn functions
                module.flash_attn_func = lambda *args, **kwargs: None
                module.flash_attn_kvpacked_func = lambda *args, **kwargs: None
                module.flash_attn_qkvpacked_func = lambda *args, **kwargs: None
            
            return module
            
        def exec_module(self, module):
            # Nothing to execute
            pass
    
    # Remove any existing modules to avoid conflicts
    for name in list(sys.modules.keys()):
        if name == 'flash_attn' or name.startswith('flash_attn.') or name == 'flash_attn_2_cuda':
            del sys.modules[name]
    
    # Register our finder at the beginning of meta_path
    sys.meta_path.insert(0, DummyFinder())
    
    # Pre-create and configure the flash_attn module
    spec = importlib.machinery.ModuleSpec(
        name='flash_attn',
        loader=DummyLoader('flash_attn'),
        is_package=True
    )
    flash_attn = importlib.util.module_from_spec(spec)
    sys.modules['flash_attn'] = flash_attn
    
    # Add attributes used by transformers checks
    flash_attn.__version__ = "0.0.0-mocked"
    
    # Create common submodules - without 'parent' parameter
    for submodule in ['flash_attn.flash_attn_interface', 'flash_attn.flash_attn_triton']:
        parts = submodule.split('.')
        parent_name = '.'.join(parts[:-1])
        child_name = parts[-1]
        parent = sys.modules[parent_name]
        
        # Create submodule spec - removed 'parent' parameter
        subspec = importlib.machinery.ModuleSpec(
            name=submodule,
            loader=DummyLoader(submodule),
            is_package=False
        )
        
        # Create and register submodule
        module = importlib.util.module_from_spec(subspec)
        setattr(parent, child_name, module)
        sys.modules[submodule] = module
    
    # Create flash_attn_2_cuda module
    cuda_spec = importlib.machinery.ModuleSpec(
        name='flash_attn_2_cuda',
        loader=DummyLoader('flash_attn_2_cuda'),
        is_package=False
    )
    cuda_module = importlib.util.module_from_spec(cuda_spec)
    sys.modules['flash_attn_2_cuda'] = cuda_module
    
    # Set environment variables to disable flash attention
    os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
    os.environ["TRANSFORMERS_OFFLINE"] = "1"  # Avoid online checks
    
    # Verify the mock was created successfully
    try:
        import flash_attn
        print(f"βœ“ Mock flash_attn loaded successfully: {flash_attn.__version__}")
        print(f"βœ“ flash_attn.__spec__ exists: {flash_attn.__spec__ is not None}")
        
        # Let's explicitly check for __spec__ in importlib.util.find_spec
        spec = importlib.util.find_spec("flash_attn")
        print(f"βœ“ importlib.util.find_spec returns: {spec is not None}")
        
        # Check that parent/child relationships work
        import flash_attn.flash_attn_interface
        print("βœ“ flash_attn.flash_attn_interface loaded")
        
        # Check CUDA module
        import flash_attn_2_cuda
        print("βœ“ flash_attn_2_cuda loaded")
    except Exception as e:
        print(f"WARNING: Error verifying flash_attn mock: {e}")
        traceback.print_exc()

# Now set up the mock BEFORE importing transformers
create_in_memory_flash_attn_mock()

# Import transformers AFTER setting up mock
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

# Create a function to load the model
def load_model():
    try:
        print("\nLoading InternVL2 model...")
        
        # Load the model and tokenizer
        # FIXED: Corrected model name from InternVL2-8B to InternVL2
        model_path = "OpenGVLab/InternVL2"
        
        # Print downloading status
        print("Downloading model shards. This may take some time...")
        
        # Load the model - with careful error handling
        try:
            model = AutoModelForCausalLM.from_pretrained(
                model_path,
                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                low_cpu_mem_usage=True,
                device_map="auto" if torch.cuda.is_available() else None,
                trust_remote_code=True
            )
        except Exception as e:
            # If we get the flash_attn error, print detailed information
            if "flash_attn.__spec__ is not set" in str(e):
                print("\n❌ Flash attention error detected!")
                
                # See if our mock is still in place
                if 'flash_attn' in sys.modules:
                    mock = sys.modules['flash_attn']
                    print(f"Flash mock exists: {mock}")
                    print(f"Flash mock __spec__: {getattr(mock, '__spec__', 'NOT SET')}")
                else:
                    print("flash_attn module was removed from sys.modules")
                
                # Diagnostic info
                print("\nCurrent state of sys.meta_path:")
                for i, finder in enumerate(sys.meta_path):
                    print(f"  {i}: {finder.__class__.__name__}")
            
            # Re-raise the exception
            raise
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            model_path, 
            use_fast=False,
            trust_remote_code=True
        )
        
        # Set generation config
        generation_config = GenerationConfig.from_pretrained(
            model_path,
            trust_remote_code=True
        )
        
        print("βœ“ Model and tokenizer loaded successfully!")
        return model, tokenizer, generation_config
        
    except Exception as e:
        print(f"\n❌ ERROR loading model: {str(e)}")
        traceback.print_exc()
        return None, None, None

# Helper function to load and process an image
def load_image(image_path, processor=None):
    """Load an image and prepare it for the model."""
    if isinstance(image_path, str):
        if image_path.startswith('http'):
            import requests
            from io import BytesIO
            try:
                response = requests.get(image_path, timeout=10)
                image = Image.open(BytesIO(response.content)).convert('RGB')
            except Exception as e:
                print(f"Error loading image from URL: {e}")
                # Return a default image or raise an error
                image = Image.new('RGB', (224, 224), color='gray')
        else:
            image = Image.open(image_path).convert('RGB')
    else:
        image = image_path
    
    # No need to process, the model handles that internally
    return image

# Function to analyze an image with text
def analyze_image(model, tokenizer, image, prompt, generation_config):
    try:
        # Prepare inputs
        text_prompt = f"USER: <image>\n{prompt}\nASSISTANT:"
        
        # Convert inputs for the model
        inputs = tokenizer([text_prompt], return_tensors="pt")
        
        # Move inputs to the right device
        if torch.cuda.is_available():
            inputs = {k: v.to("cuda") for k, v in inputs.items()}
        
        # Add image to the inputs
        inputs["images"] = [image]
        
        # Generate a response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                generation_config=generation_config,
                max_new_tokens=512,
            )
        
        # Decode the outputs
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract only the assistant's response
        assistant_response = generated_text.split("ASSISTANT:")[-1].strip()
        
        return assistant_response
        
    except Exception as e:
        error_msg = f"Error analyzing image: {str(e)}"
        traceback.print_exc()
        return error_msg

# Create the Gradio interface
def create_interface():
    # Load model at startup
    model, tokenizer, generation_config = load_model()
    
    if model is None:
        # If model loading failed, create a simple error interface
        with gr.Blocks(title="InternVL2 Image Analysis - Error") as demo:
            gr.Markdown("# ❌ Error: Failed to load models")
            gr.Markdown("Please check the console for error details.")
        return demo
    
    # Predefined prompts for analysis
    prompts = [
        "Describe this image in detail.",
        "What can you tell me about this image?",
        "Is there any text in this image? If so, can you read it?",
        "What is the main subject of this image?",
        "What emotions or feelings does this image convey?",
        "Describe the composition and visual elements of this image.",
        "Summarize what you see in this image in one paragraph."
    ]
    
    # Create the full interface
    with gr.Blocks(title="InternVL2 Image Analysis") as demo:
        gr.Markdown("# πŸ–ΌοΈ InternVL2 Image & Text Analyzer")
        gr.Markdown("### Upload an image and ask questions about it")
        
        with gr.Row():
            with gr.Column(scale=1):
                input_image = gr.Image(type="pil", label="Upload Image")
                prompt_input = gr.Dropdown(
                    choices=prompts, 
                    value=prompts[0], 
                    label="Select a prompt or enter your own below",
                    allow_custom_value=True
                )
                custom_prompt = gr.Textbox(label="Custom prompt", placeholder="Enter your custom prompt here...")
                analyze_btn = gr.Button("Analyze Image", variant="primary")
                
            with gr.Column(scale=1):
                output = gr.Textbox(label="Analysis Results", lines=15)
        
        # Example images - Using stable URLs from GitHub repositories
        gr.Examples(
            examples=[
                ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen_sink/files/cheetah1.jpg", "What's in this image?"],
                ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen_sink/files/lion.jpg", "Describe this animal."],
            ],
            inputs=[input_image, custom_prompt],
        )
        
        # When prompt dropdown changes, update custom prompt
        prompt_input.change(fn=lambda x: x, inputs=prompt_input, outputs=custom_prompt)
        
        # Set up the click event for analysis
        def on_analyze_click(image, prompt_text):
            if image is None:
                return "Please upload an image first."
            
            # Use either the dropdown selection or custom prompt
            final_prompt = prompt_text if prompt_text.strip() else prompt_input
            
            result = analyze_image(model, tokenizer, image, final_prompt, generation_config)
            return result
        
        analyze_btn.click(
            fn=on_analyze_click,
            inputs=[input_image, custom_prompt],
            outputs=output
        )
        
    return demo

# Main function
if __name__ == "__main__":
    # Set environment variable for better GPU memory management
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
    
    # Create and launch the interface
    demo = create_interface()
    demo.launch(share=False, server_name="0.0.0.0")