Fix example images URLs to prevent 404 errors
Browse files
app.py
CHANGED
@@ -6,11 +6,9 @@ from PIL import Image
|
|
6 |
import traceback
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
from transformers.generation import GenerationConfig
|
9 |
-
import importlib.util
|
10 |
-
import importlib.machinery
|
11 |
|
12 |
print("=" * 50)
|
13 |
-
print("InternVL2
|
14 |
print("=" * 50)
|
15 |
|
16 |
# System information
|
@@ -31,88 +29,86 @@ if torch.cuda.is_available():
|
|
31 |
else:
|
32 |
print("CUDA is not available. This application requires GPU acceleration.")
|
33 |
|
34 |
-
# Create a
|
35 |
def setup_flash_attn_mock():
|
36 |
-
#
|
37 |
-
|
38 |
|
39 |
-
# First,
|
40 |
if "flash_attn" in sys.modules:
|
41 |
-
|
42 |
-
|
43 |
-
# Create a simple Python file with flash_attn mock code
|
44 |
-
flash_attn_path = os.path.join(os.getcwd(), "flash_attn.py")
|
45 |
-
with open(flash_attn_path, "w") as f:
|
46 |
-
f.write("""
|
47 |
-
# Mock flash_attn module
|
48 |
-
__version__ = "0.0.0-disabled"
|
49 |
-
|
50 |
-
def flash_attn_func(*args, **kwargs):
|
51 |
-
raise NotImplementedError("This is a mock flash_attn implementation")
|
52 |
-
|
53 |
-
def flash_attn_kvpacked_func(*args, **kwargs):
|
54 |
-
raise NotImplementedError("This is a mock flash_attn implementation")
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
# Add any other functions that might be needed
|
60 |
-
""")
|
61 |
|
62 |
-
#
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
-
#
|
69 |
-
|
70 |
-
|
71 |
-
with open(flash_attn_2_path, "w") as f:
|
72 |
-
f.write("# Mock flash_attn_2_cuda module\n")
|
73 |
-
|
74 |
-
spec_cuda = importlib.util.spec_from_file_location("flash_attn_2_cuda", flash_attn_2_path)
|
75 |
-
flash_attn_2_cuda_module = importlib.util.module_from_spec(spec_cuda)
|
76 |
-
sys.modules["flash_attn_2_cuda"] = flash_attn_2_cuda_module
|
77 |
-
spec_cuda.loader.exec_module(flash_attn_2_cuda_module)
|
78 |
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
# Create a function to load the model
|
82 |
def load_model():
|
83 |
try:
|
84 |
-
print("\nLoading InternVL2
|
85 |
|
86 |
-
#
|
87 |
setup_flash_attn_mock()
|
88 |
|
89 |
-
# Disable flash attention in transformers by patching environment vars
|
90 |
-
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
91 |
-
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Avoid online checks for flash_attn
|
92 |
-
|
93 |
# Load the model and tokenizer
|
94 |
model_path = "OpenGVLab/InternVL2-8B"
|
95 |
-
print("Loading tokenizer...")
|
96 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
97 |
|
98 |
-
|
99 |
-
|
|
|
|
|
100 |
model = AutoModelForCausalLM.from_pretrained(
|
101 |
model_path,
|
102 |
-
torch_dtype=torch.bfloat16,
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
attn_implementation="eager" # Use eager implementation instead
|
107 |
)
|
108 |
|
109 |
-
#
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
116 |
)
|
117 |
|
118 |
print("✓ Model and tokenizer loaded successfully!")
|
@@ -127,24 +123,55 @@ def load_model():
|
|
127 |
def load_image(image_path, processor=None):
|
128 |
"""Load an image and prepare it for the model."""
|
129 |
if isinstance(image_path, str):
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
else:
|
132 |
image = image_path
|
133 |
|
134 |
-
#
|
135 |
return image
|
136 |
|
137 |
# Function to analyze an image with text
|
138 |
def analyze_image(model, tokenizer, image, prompt, generation_config):
|
139 |
try:
|
140 |
-
#
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
|
145 |
# Generate a response
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
except Exception as e:
|
150 |
error_msg = f"Error analyzing image: {str(e)}"
|
@@ -158,7 +185,7 @@ def create_interface():
|
|
158 |
|
159 |
if model is None:
|
160 |
# If model loading failed, create a simple error interface
|
161 |
-
with gr.Blocks(title="InternVL2
|
162 |
gr.Markdown("# ❌ Error: Failed to load models")
|
163 |
gr.Markdown("Please check the console for error details.")
|
164 |
return demo
|
@@ -166,18 +193,18 @@ def create_interface():
|
|
166 |
# Predefined prompts for analysis
|
167 |
prompts = [
|
168 |
"Describe this image in detail.",
|
169 |
-
"What
|
170 |
-
"
|
171 |
"What is the main subject of this image?",
|
172 |
-
"
|
173 |
-
"Describe the
|
174 |
-
"Summarize
|
175 |
]
|
176 |
|
177 |
# Create the full interface
|
178 |
with gr.Blocks(title="InternVL2 Image Analysis") as demo:
|
179 |
-
gr.Markdown("# 🖼️ InternVL2
|
180 |
-
gr.Markdown("### Upload an image
|
181 |
|
182 |
with gr.Row():
|
183 |
with gr.Column(scale=1):
|
@@ -194,11 +221,11 @@ def create_interface():
|
|
194 |
with gr.Column(scale=1):
|
195 |
output = gr.Textbox(label="Analysis Results", lines=15)
|
196 |
|
197 |
-
# Example images
|
198 |
gr.Examples(
|
199 |
examples=[
|
200 |
-
["https://
|
201 |
-
["https://
|
202 |
],
|
203 |
inputs=[input_image, custom_prompt],
|
204 |
)
|
@@ -232,4 +259,4 @@ if __name__ == "__main__":
|
|
232 |
|
233 |
# Create and launch the interface
|
234 |
demo = create_interface()
|
235 |
-
demo.launch(share=False, server_name="0.0.0.0")
|
|
|
6 |
import traceback
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
from transformers.generation import GenerationConfig
|
|
|
|
|
9 |
|
10 |
print("=" * 50)
|
11 |
+
print("InternVL2 IMAGE & TEXT ANALYSIS")
|
12 |
print("=" * 50)
|
13 |
|
14 |
# System information
|
|
|
29 |
else:
|
30 |
print("CUDA is not available. This application requires GPU acceleration.")
|
31 |
|
32 |
+
# Create a mock function for flash_attn modules
|
33 |
def setup_flash_attn_mock():
|
34 |
+
# Disable flash attention in transformers
|
35 |
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
36 |
|
37 |
+
# First, check if flash_attn is already imported
|
38 |
if "flash_attn" in sys.modules:
|
39 |
+
print("flash_attn module already imported - no mocking needed")
|
40 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# If we should mock the module
|
43 |
+
print("Setting up flash_attn mock...")
|
|
|
|
|
|
|
44 |
|
45 |
+
# Create a proper mock that has the necessary attributes
|
46 |
+
class FlashAttnMock:
|
47 |
+
__version__ = "0.0.0-disabled-mock"
|
48 |
+
|
49 |
+
def __init__(self):
|
50 |
+
pass
|
51 |
+
|
52 |
+
def flash_attn_func(self, *args, **kwargs):
|
53 |
+
raise NotImplementedError("This is a mock flash_attn implementation")
|
54 |
+
|
55 |
+
def flash_attn_kvpacked_func(self, *args, **kwargs):
|
56 |
+
raise NotImplementedError("This is a mock flash_attn implementation")
|
57 |
+
|
58 |
+
def flash_attn_qkvpacked_func(self, *args, **kwargs):
|
59 |
+
raise NotImplementedError("This is a mock flash_attn implementation")
|
60 |
+
|
61 |
+
# Create the module with proper spec
|
62 |
+
import types
|
63 |
+
flash_attn_mock = FlashAttnMock()
|
64 |
+
sys.modules["flash_attn"] = flash_attn_mock
|
65 |
+
print("flash_attn mock set up successfully")
|
66 |
|
67 |
+
# Also mock the related modules that might be imported
|
68 |
+
sys.modules["flash_attn.flash_attn_interface"] = types.ModuleType("flash_attn.flash_attn_interface")
|
69 |
+
sys.modules["flash_attn.flash_attn_triton"] = types.ModuleType("flash_attn.flash_attn_triton")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
# Check if it worked
|
72 |
+
try:
|
73 |
+
import flash_attn
|
74 |
+
print(f"Mock flash_attn module version: {flash_attn.__version__}")
|
75 |
+
except:
|
76 |
+
print("Warning: flash_attn mock failed to load correctly")
|
77 |
|
78 |
# Create a function to load the model
|
79 |
def load_model():
|
80 |
try:
|
81 |
+
print("\nLoading InternVL2 model...")
|
82 |
|
83 |
+
# Setup flash_attn mock
|
84 |
setup_flash_attn_mock()
|
85 |
|
|
|
|
|
|
|
|
|
86 |
# Load the model and tokenizer
|
87 |
model_path = "OpenGVLab/InternVL2-8B"
|
|
|
|
|
88 |
|
89 |
+
# Print downloading status
|
90 |
+
print("Downloading model shards. This may take some time...")
|
91 |
+
|
92 |
+
# Load the model
|
93 |
model = AutoModelForCausalLM.from_pretrained(
|
94 |
model_path,
|
95 |
+
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
96 |
+
low_cpu_mem_usage=True,
|
97 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
98 |
+
trust_remote_code=True
|
|
|
99 |
)
|
100 |
|
101 |
+
# Load tokenizer
|
102 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
103 |
+
model_path,
|
104 |
+
use_fast=False,
|
105 |
+
trust_remote_code=True
|
106 |
+
)
|
107 |
+
|
108 |
+
# Set generation config
|
109 |
+
generation_config = GenerationConfig.from_pretrained(
|
110 |
+
model_path,
|
111 |
+
trust_remote_code=True
|
112 |
)
|
113 |
|
114 |
print("✓ Model and tokenizer loaded successfully!")
|
|
|
123 |
def load_image(image_path, processor=None):
|
124 |
"""Load an image and prepare it for the model."""
|
125 |
if isinstance(image_path, str):
|
126 |
+
if image_path.startswith('http'):
|
127 |
+
import requests
|
128 |
+
from io import BytesIO
|
129 |
+
try:
|
130 |
+
response = requests.get(image_path, timeout=10)
|
131 |
+
image = Image.open(BytesIO(response.content)).convert('RGB')
|
132 |
+
except Exception as e:
|
133 |
+
print(f"Error loading image from URL: {e}")
|
134 |
+
# Return a default image or raise an error
|
135 |
+
image = Image.new('RGB', (224, 224), color='gray')
|
136 |
+
else:
|
137 |
+
image = Image.open(image_path).convert('RGB')
|
138 |
else:
|
139 |
image = image_path
|
140 |
|
141 |
+
# No need to process, the model handles that internally
|
142 |
return image
|
143 |
|
144 |
# Function to analyze an image with text
|
145 |
def analyze_image(model, tokenizer, image, prompt, generation_config):
|
146 |
try:
|
147 |
+
# Prepare inputs
|
148 |
+
text_prompt = f"USER: <image>\n{prompt}\nASSISTANT:"
|
149 |
+
|
150 |
+
# Convert inputs for the model
|
151 |
+
inputs = tokenizer([text_prompt], return_tensors="pt")
|
152 |
+
|
153 |
+
# Move inputs to the right device
|
154 |
+
if torch.cuda.is_available():
|
155 |
+
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
156 |
+
|
157 |
+
# Add image to the inputs
|
158 |
+
inputs["images"] = [image]
|
159 |
|
160 |
# Generate a response
|
161 |
+
with torch.no_grad():
|
162 |
+
outputs = model.generate(
|
163 |
+
**inputs,
|
164 |
+
generation_config=generation_config,
|
165 |
+
max_new_tokens=512,
|
166 |
+
)
|
167 |
+
|
168 |
+
# Decode the outputs
|
169 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
170 |
+
|
171 |
+
# Extract only the assistant's response
|
172 |
+
assistant_response = generated_text.split("ASSISTANT:")[-1].strip()
|
173 |
+
|
174 |
+
return assistant_response
|
175 |
|
176 |
except Exception as e:
|
177 |
error_msg = f"Error analyzing image: {str(e)}"
|
|
|
185 |
|
186 |
if model is None:
|
187 |
# If model loading failed, create a simple error interface
|
188 |
+
with gr.Blocks(title="InternVL2 Image Analysis - Error") as demo:
|
189 |
gr.Markdown("# ❌ Error: Failed to load models")
|
190 |
gr.Markdown("Please check the console for error details.")
|
191 |
return demo
|
|
|
193 |
# Predefined prompts for analysis
|
194 |
prompts = [
|
195 |
"Describe this image in detail.",
|
196 |
+
"What can you tell me about this image?",
|
197 |
+
"Is there any text in this image? If so, can you read it?",
|
198 |
"What is the main subject of this image?",
|
199 |
+
"What emotions or feelings does this image convey?",
|
200 |
+
"Describe the composition and visual elements of this image.",
|
201 |
+
"Summarize what you see in this image in one paragraph."
|
202 |
]
|
203 |
|
204 |
# Create the full interface
|
205 |
with gr.Blocks(title="InternVL2 Image Analysis") as demo:
|
206 |
+
gr.Markdown("# 🖼️ InternVL2 Image & Text Analyzer")
|
207 |
+
gr.Markdown("### Upload an image and ask questions about it")
|
208 |
|
209 |
with gr.Row():
|
210 |
with gr.Column(scale=1):
|
|
|
221 |
with gr.Column(scale=1):
|
222 |
output = gr.Textbox(label="Analysis Results", lines=15)
|
223 |
|
224 |
+
# Example images - UPDATED with more reliable image URLs
|
225 |
gr.Examples(
|
226 |
examples=[
|
227 |
+
["https://github.com/huggingface/transformers/raw/main/docs/source/en/model_doc/blip-2_files/BobRoss.jpg", "What's in this image?"],
|
228 |
+
["https://raw.githubusercontent.com/openai/CLIP/main/CLIP.png", "Describe this diagram in detail."],
|
229 |
],
|
230 |
inputs=[input_image, custom_prompt],
|
231 |
)
|
|
|
259 |
|
260 |
# Create and launch the interface
|
261 |
demo = create_interface()
|
262 |
+
demo.launch(share=False, server_name="0.0.0.0")
|