Spaces:

abiyyufahri
/

GUI-Agent

Sleeping

App Files Files Community

abiyyufahri commited on Jul 24

Commit

f1199d3

1 Parent(s): 1c943af

Install error fix attemp 9

Browse files

Files changed (3) hide show

Dockerfile +33 -16
main.py +72 -47
requirements.txt +18 -6

Dockerfile CHANGED Viewed

@@ -1,10 +1,12 @@
 FROM python:3.10-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git gcc g++ libglib2.0-0 libsm6 libxext6 libxrender-dev \
     build-essential curl && \
     rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
@@ -14,35 +16,50 @@ WORKDIR /app
 # Copy requirements first for better caching
 COPY --chown=user requirements.txt ./
-# Install dependencies step by step
 RUN pip install --upgrade pip && \
-    pip install --no-cache-dir packaging ninja wheel setuptools "numpy<2.0.0"
-# Install PyTorch CPU version
-RUN pip install --no-cache-dir torch==2.2.2+cpu torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-# Install core dependencies
 RUN pip install --no-cache-dir \
-    transformers \
     datasets \
     Pillow \
     accelerate \
-    scipy \
-    qwen-vl-utils \
     fastapi \
     "uvicorn[standard]"
-# Install GUI-Actor dependencies manually (skip flash-attn)
 RUN pip install --no-cache-dir \
-    pre-commit \
-    liger-kernel==0.5.2 \
-    opencv-python-headless \
-    deepspeed==0.16.0
 # Copy all application files
 COPY --chown=user . .
-# Ensure main.py exists and is readable
-RUN ls -la /app/ && cat /app/main.py | head -10
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-slim
+# Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git gcc g++ libglib2.0-0 libsm6 libxext6 libxrender-dev \
     build-essential curl && \
     rm -rf /var/lib/apt/lists/*
+# Create user
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"
 # Copy requirements first for better caching
 COPY --chown=user requirements.txt ./
+# Install dependencies with proper NumPy version
 RUN pip install --upgrade pip && \
+    pip install --no-cache-dir packaging ninja wheel setuptools
+# Install NumPy 1.x to avoid compatibility issues
+RUN pip install --no-cache-dir "numpy>=1.21.0,<2.0.0"
+# Install PyTorch CPU version (compatible with NumPy 1.x)
+RUN pip install --no-cache-dir torch==2.2.2+cpu torchvision==0.17.2+cpu torchaudio==2.2.2+cpu \
+    --index-url https://download.pytorch.org/whl/cpu
+# Install transformers and related packages
 RUN pip install --no-cache-dir \
+    "transformers>=4.37.0" \
     datasets \
     Pillow \
     accelerate \
+    scipy
+# Install FastAPI and related packages
+RUN pip install --no-cache-dir \
     fastapi \
     "uvicorn[standard]"
+# Install other dependencies (skip problematic ones)
 RUN pip install --no-cache-dir \
+    opencv-python-headless
+# Try to install qwen-vl-utils (if it fails, continue)
+RUN pip install --no-cache-dir qwen-vl-utils || echo "qwen-vl-utils installation failed, continuing..."
 # Copy all application files
 COPY --chown=user . .
+# Set environment variables for better compatibility
+ENV TRANSFORMERS_CACHE=/tmp/transformers_cache
+ENV HF_HOME=/tmp/hf_home
+ENV PYTHONUNBUFFERED=1
+# Expose port
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "120"]

main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI, Form
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from PIL import Image
@@ -6,55 +6,75 @@ from io import BytesIO
 import base64
 import torch
 import re
-app = FastAPI(title="GUI-Actor API", version="1.0.0")
 # Initialize global variables
 model = None
 processor = None
 tokenizer = None
 model_name = "microsoft/GUI-Actor-2B-Qwen2-VL"
-def load_model():
     """Load model with proper error handling"""
-    global model, processor, tokenizer
     try:
-        print("Loading processor...")
-        # Try different approaches to load the processor
-        try:
-            from transformers import Qwen2VLProcessor
-            processor = Qwen2VLProcessor.from_pretrained(model_name)
-            print("Successfully loaded Qwen2VLProcessor")
-        except Exception as e:
-            print(f"Failed to load Qwen2VLProcessor: {e}")
-            from transformers import AutoProcessor
-            processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-            print("Successfully loaded AutoProcessor")
-        tokenizer = processor.tokenizer
-        print("Loading model...")
-        # Use the correct model class for Qwen2VL
-        from transformers import Qwen2VLForConditionalGeneration
-        model = Qwen2VLForConditionalGeneration.from_pretrained(
             model_name,
-            torch_dtype=torch.float32,  # float32 untuk CPU
-            device_map=None,            # CPU only
-            trust_remote_code=True,     # untuk custom model
-            attn_implementation=None    # skip flash attention
         ).eval()
-        print("Model loaded successfully!")
         return True
     except Exception as e:
-        print(f"Error loading model: {e}")
         return False
-# Load model at startup
-model_loaded = load_model()
 class Base64Request(BaseModel):
     image_base64: str
@@ -136,6 +156,7 @@ def cpu_inference(conversation, model, tokenizer, processor):
         }
     except Exception as e:
         return {
             "topk_points": [(0.5, 0.5)],
             "response": f"Error during inference: {str(e)}",
@@ -153,20 +174,26 @@ async def root():
 @app.post("/click/base64")
 async def predict_click_base64(data: Base64Request):
     if not model_loaded:
-        return JSONResponse(
-            content={
-                "error": "Model not loaded properly",
-                "success": False,
-                "x": 0.5,
-                "y": 0.5
-            },
-            status_code=503
         )
     try:
         # Decode base64 to image
-        image_data = base64.b64decode(data.image_base64.split(",")[-1])
-        pil_image = Image.open(BytesIO(image_data)).convert("RGB")
         conversation = [
             {
@@ -204,21 +231,19 @@ async def predict_click_base64(data: Base64Request):
             "success": pred["success"]
         })
     except Exception as e:
-        return JSONResponse(
-            content={
-                "error": str(e),
-                "success": False,
-                "x": 0.5,
-                "y": 0.5
-            },
-            status_code=500
         )
 @app.get("/health")
 async def health_check():
     return {
-        "status": "healthy",
         "model": model_name,
         "device": "cpu",
         "torch_dtype": "float32",

+from fastapi import FastAPI, Form, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from PIL import Image
 import base64
 import torch
 import re
+import logging
+import asyncio
+from contextlib import asynccontextmanager
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Initialize global variables
 model = None
 processor = None
 tokenizer = None
 model_name = "microsoft/GUI-Actor-2B-Qwen2-VL"
+model_loaded = False
+async def load_model():
     """Load model with proper error handling"""
+    global model, processor, tokenizer, model_loaded
     try:
+        logger.info("Starting model loading...")
+        # Import required modules
+        from transformers import AutoProcessor, AutoModelForCausalLM
+        logger.info("Loading processor...")
+        # Use AutoProcessor for better compatibility
+        processor = AutoProcessor.from_pretrained(
+            model_name,
+            trust_remote_code=True
+        )
+        logger.info("Processor loaded successfully")
+        tokenizer = processor.tokenizer
+        logger.info("Loading model...")
+        # Use AutoModelForCausalLM for better compatibility
+        model = AutoModelForCausalLM.from_pretrained(
             model_name,
+            torch_dtype=torch.float32,
+            device_map=None,  # CPU only
+            trust_remote_code=True,
+            low_cpu_mem_usage=True  # For better memory management
         ).eval()
+        logger.info("Model loaded successfully!")
+        model_loaded = True
         return True
     except Exception as e:
+        logger.error(f"Error loading model: {e}")
+        model_loaded = False
         return False
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    logger.info("Starting up GUI-Actor API...")
+    await load_model()
+    yield
+    # Shutdown
+    logger.info("Shutting down GUI-Actor API...")
+# Initialize FastAPI app with lifespan
+app = FastAPI(
+    title="GUI-Actor API",
+    version="1.0.0",
+    lifespan=lifespan
+)
 class Base64Request(BaseModel):
     image_base64: str
         }
     except Exception as e:
+        logger.error(f"Inference error: {e}")
         return {
             "topk_points": [(0.5, 0.5)],
             "response": f"Error during inference: {str(e)}",
 @app.post("/click/base64")
 async def predict_click_base64(data: Base64Request):
     if not model_loaded:
+        raise HTTPException(
+            status_code=503,
+            detail="Model not loaded properly"
         )
     try:
         # Decode base64 to image
+        try:
+            # Handle data URL format
+            if "," in data.image_base64:
+                image_data = base64.b64decode(data.image_base64.split(",")[-1])
+            else:
+                image_data = base64.b64decode(data.image_base64)
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"Invalid base64 image: {e}")
+        try:
+            pil_image = Image.open(BytesIO(image_data)).convert("RGB")
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"Invalid image format: {e}")
         conversation = [
             {
             "success": pred["success"]
         })
+    except HTTPException:
+        raise
     except Exception as e:
+        logger.error(f"Prediction error: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Internal server error: {str(e)}"
         )
 @app.get("/health")
 async def health_check():
     return {
+        "status": "healthy" if model_loaded else "unhealthy",
         "model": model_name,
         "device": "cpu",
         "torch_dtype": "float32",

requirements.txt CHANGED Viewed

@@ -1,16 +1,28 @@
 packaging
 ninja
 fastapi
 uvicorn[standard]
 transformers>=4.37.0
 datasets
 Pillow
-# Fix NumPy compatibility issue
-numpy<2.0.0
-torch==2.2.2+cpu
-torchvision
-torchaudio
---index-url https://download.pytorch.org/whl/cpu
 accelerate
 scipy
 qwen-vl-utils

+# Core dependencies
 packaging
 ninja
+wheel
+setuptools
+# NumPy version that's compatible with PyTorch and transformers
+numpy>=1.21.0,<2.0.0
+# PyTorch CPU version (will be installed via Dockerfile)
+# torch==2.2.2+cpu
+# torchvision==0.17.2+cpu
+# torchaudio==2.2.2+cpu
+# FastAPI and related
 fastapi
 uvicorn[standard]
+# Transformers and ML dependencies
 transformers>=4.37.0
 datasets
 Pillow
 accelerate
 scipy
+# Optional dependencies (install if available)
+opencv-python-headless
 qwen-vl-utils