RishitMishra commited on
Commit
2fa0887
·
verified ·
1 Parent(s): 93d0ee9

Upload 3 files

Browse files
Files changed (3) hide show
  1. DockerFile +11 -0
  2. app.py +47 -0
  3. requirements.txt +7 -0
DockerFile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt requirements.txt
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY app.py app.py
9
+
10
+ EXPOSE 7860
11
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, Form
2
+ from PIL import Image
3
+ from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
4
+ from transformers import Qwen2VLProcessor
5
+ from gui_actor.inference import inference
6
+ import torch
7
+ import io
8
+
9
+ app = FastAPI()
10
+
11
+ # Load model + processor at startup
12
+ MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL"
13
+ processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME)
14
+ tokenizer = processor.tokenizer
15
+ model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
16
+ MODEL_NAME,
17
+ torch_dtype=torch.float32,
18
+ device_map="auto"
19
+ ).eval()
20
+
21
+ @app.get("/")
22
+ def home():
23
+ return {"message": "GUI-Actor Space is running"}
24
+
25
+ @app.post("/predict/")
26
+ async def predict(
27
+ instruction: str = Form(...),
28
+ image: UploadFile = Form(...)
29
+ ):
30
+ # Read and process image
31
+ img_bytes = await image.read()
32
+ img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
33
+
34
+ # Auto resize if needed
35
+ max_width, max_height = 480, 270
36
+ if img.width > max_width or img.height > max_height:
37
+ img.thumbnail((max_width, max_height))
38
+
39
+ # Run inference
40
+ click_point = inference(
41
+ instruction=instruction,
42
+ image=img,
43
+ model=model,
44
+ processor=processor,
45
+ tokenizer=tokenizer
46
+ )
47
+ return {"click_point": click_point}
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pillow
4
+ torch
5
+ transformers
6
+ qwen-vl-utils
7
+ git+https://github.com/microsoft/GUI-Actor.git