abiyyufahri commited on
Commit
5599f5a
·
1 Parent(s): 5ef548f

Install error fix attemp 12

Browse files
Files changed (1) hide show
  1. main.py +121 -36
main.py CHANGED
@@ -22,36 +22,93 @@ model_name = "microsoft/GUI-Actor-2B-Qwen2-VL"
22
  model_loaded = False
23
 
24
  async def load_model():
25
- """Load model with proper error handling"""
26
  global model, processor, tokenizer, model_loaded
27
 
28
  try:
29
  logger.info("Starting model loading...")
30
 
31
- # Import required modules - use specific Qwen2VL classes
32
- from transformers import Qwen2VLProcessor, Qwen2VLForConditionalGeneration
33
-
34
- logger.info("Loading processor...")
35
- # Use specific Qwen2VL processor
36
- processor = Qwen2VLProcessor.from_pretrained(
37
- model_name,
38
- trust_remote_code=True
39
- )
40
- logger.info("Processor loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
 
 
 
42
  tokenizer = processor.tokenizer
43
-
44
- logger.info("Loading model...")
45
- # Use specific Qwen2VL model class
46
- model = Qwen2VLForConditionalGeneration.from_pretrained(
47
- model_name,
48
- torch_dtype=torch.float32,
49
- device_map=None, # CPU only
50
- trust_remote_code=True,
51
- low_cpu_mem_usage=True # For better memory management
52
- ).eval()
53
-
54
- logger.info("Model loaded successfully!")
55
  model_loaded = True
56
  return True
57
 
@@ -111,7 +168,7 @@ def extract_coordinates(text):
111
 
112
  def cpu_inference(conversation, model, tokenizer, processor):
113
  """
114
- Inference function untuk CPU
115
  """
116
  try:
117
  # Apply chat template
@@ -124,23 +181,36 @@ def cpu_inference(conversation, model, tokenizer, processor):
124
  # Get image from conversation
125
  image = conversation[1]["content"][0]["image"]
126
 
127
- # Process inputs
128
  inputs = processor(
129
  text=[text],
130
  images=[image],
131
- return_tensors="pt"
 
 
 
132
  )
133
 
134
- # Generate response
135
  with torch.no_grad():
136
- outputs = model.generate(
137
- **inputs,
138
- max_new_tokens=256,
139
- do_sample=True,
140
- temperature=0.3,
141
- top_p=0.8,
142
- pad_token_id=tokenizer.eos_token_id
143
- )
 
 
 
 
 
 
 
 
 
 
144
 
145
  # Decode response
146
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
@@ -168,7 +238,8 @@ async def root():
168
  return {
169
  "message": "GUI-Actor API is running",
170
  "status": "healthy",
171
- "model_loaded": model_loaded
 
172
  }
173
 
174
  @app.post("/click/base64")
@@ -248,4 +319,18 @@ async def health_check():
248
  "device": "cpu",
249
  "torch_dtype": "float32",
250
  "model_loaded": model_loaded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  }
 
22
  model_loaded = False
23
 
24
  async def load_model():
25
+ """Load model with proper error handling and fallback strategies"""
26
  global model, processor, tokenizer, model_loaded
27
 
28
  try:
29
  logger.info("Starting model loading...")
30
 
31
+ # Try specific Qwen2VL classes first
32
+ try:
33
+ logger.info("Attempting to load with Qwen2VL specific classes...")
34
+ from transformers import Qwen2VLProcessor, Qwen2VLForConditionalGeneration
35
+
36
+ processor = Qwen2VLProcessor.from_pretrained(
37
+ model_name,
38
+ trust_remote_code=True
39
+ )
40
+
41
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
42
+ model_name,
43
+ torch_dtype=torch.float32,
44
+ device_map=None, # CPU only
45
+ trust_remote_code=True,
46
+ low_cpu_mem_usage=True
47
+ ).eval()
48
+
49
+ logger.info("Successfully loaded with Qwen2VL specific classes")
50
+
51
+ except Exception as e1:
52
+ logger.warning(f"Failed with Qwen2VL classes: {e1}")
53
+ logger.info("Trying AutoProcessor and AutoModel fallback...")
54
+
55
+ try:
56
+ from transformers import AutoProcessor, AutoModel
57
+
58
+ processor = AutoProcessor.from_pretrained(
59
+ model_name,
60
+ trust_remote_code=True
61
+ )
62
+
63
+ model = AutoModel.from_pretrained(
64
+ model_name,
65
+ torch_dtype=torch.float32,
66
+ device_map=None,
67
+ trust_remote_code=True,
68
+ low_cpu_mem_usage=True
69
+ ).eval()
70
+
71
+ logger.info("Successfully loaded with Auto classes")
72
+
73
+ except Exception as e2:
74
+ logger.warning(f"Failed with Auto classes: {e2}")
75
+ logger.info("Trying generic transformers approach...")
76
+
77
+ # Last fallback - try loading as generic model
78
+ from transformers import AutoConfig, AutoTokenizer
79
+ import transformers
80
+
81
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
82
+ logger.info(f"Model config type: {type(config)}")
83
+
84
+ # Try to find the right model class
85
+ if hasattr(transformers, 'Qwen2VLForConditionalGeneration'):
86
+ ModelClass = getattr(transformers, 'Qwen2VLForConditionalGeneration')
87
+ elif hasattr(transformers, 'AutoModelForVision2Seq'):
88
+ ModelClass = getattr(transformers, 'AutoModelForVision2Seq')
89
+ else:
90
+ raise Exception("No suitable model class found")
91
+
92
+ processor = AutoProcessor.from_pretrained(
93
+ model_name,
94
+ trust_remote_code=True
95
+ )
96
+
97
+ model = ModelClass.from_pretrained(
98
+ model_name,
99
+ config=config,
100
+ torch_dtype=torch.float32,
101
+ device_map=None,
102
+ trust_remote_code=True,
103
+ low_cpu_mem_usage=True
104
+ ).eval()
105
 
106
+ # Verify processor and model are loaded
107
+ if processor is None or model is None:
108
+ raise Exception("Failed to load processor or model")
109
+
110
  tokenizer = processor.tokenizer
111
+ logger.info("Model and processor loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
112
  model_loaded = True
113
  return True
114
 
 
168
 
169
  def cpu_inference(conversation, model, tokenizer, processor):
170
  """
171
+ Inference function untuk CPU with better error handling
172
  """
173
  try:
174
  # Apply chat template
 
181
  # Get image from conversation
182
  image = conversation[1]["content"][0]["image"]
183
 
184
+ # Process inputs with proper padding
185
  inputs = processor(
186
  text=[text],
187
  images=[image],
188
+ return_tensors="pt",
189
+ padding=True, # Enable padding
190
+ truncation=True, # Enable truncation for long texts
191
+ max_length=512 # Set reasonable max length
192
  )
193
 
194
+ # Generate response with proper error handling
195
  with torch.no_grad():
196
+ try:
197
+ outputs = model.generate(
198
+ **inputs,
199
+ max_new_tokens=256,
200
+ do_sample=True,
201
+ temperature=0.3,
202
+ top_p=0.8,
203
+ pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else tokenizer.pad_token_id
204
+ )
205
+ except Exception as e:
206
+ logger.error(f"Generation error: {e}")
207
+ # Try with simpler parameters
208
+ outputs = model.generate(
209
+ **inputs,
210
+ max_new_tokens=128,
211
+ do_sample=False,
212
+ pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 0
213
+ )
214
 
215
  # Decode response
216
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
 
238
  return {
239
  "message": "GUI-Actor API is running",
240
  "status": "healthy",
241
+ "model_loaded": model_loaded,
242
+ "model_name": model_name
243
  }
244
 
245
  @app.post("/click/base64")
 
319
  "device": "cpu",
320
  "torch_dtype": "float32",
321
  "model_loaded": model_loaded
322
+ }
323
+
324
+ @app.get("/debug")
325
+ async def debug_info():
326
+ """Debug endpoint to check model loading status"""
327
+ import transformers
328
+ available_classes = [attr for attr in dir(transformers) if 'Qwen' in attr or 'VL' in attr]
329
+
330
+ return {
331
+ "model_loaded": model_loaded,
332
+ "processor_type": type(processor).__name__ if processor else None,
333
+ "model_type": type(model).__name__ if model else None,
334
+ "available_qwen_classes": available_classes,
335
+ "transformers_version": transformers.__version__
336
  }