Spaces:
Configuration error
Configuration error
update
Browse files- app.py +13 -8
- assets/images/apple.png +0 -0
- assets/images/magma_game_thin.png +0 -0
app.py
CHANGED
|
@@ -16,8 +16,8 @@ import random
|
|
| 16 |
pygame.mixer.quit() # Disable sound
|
| 17 |
|
| 18 |
# Constants
|
| 19 |
-
WIDTH, HEIGHT =
|
| 20 |
-
GRID_SIZE =
|
| 21 |
WHITE = (255, 255, 255)
|
| 22 |
GREEN = (34, 139, 34) # Forest green - more like an apple
|
| 23 |
RED = (200, 50, 50)
|
|
@@ -42,9 +42,12 @@ magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_cod
|
|
| 42 |
magam_model.to("cuda")
|
| 43 |
|
| 44 |
# Load magma image
|
| 45 |
-
magma_img = pygame.image.load("./assets/images/
|
| 46 |
magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
class MagmaFindGPU:
|
| 49 |
def __init__(self):
|
| 50 |
self.reset()
|
|
@@ -106,8 +109,10 @@ class MagmaFindGPU:
|
|
| 106 |
surface.blit(magma_img, (head_x * GRID_SIZE, head_y * GRID_SIZE))
|
| 107 |
|
| 108 |
# pygame.draw.rect(surface, RED, (self.snake[0][0] * GRID_SIZE, self.snake[0][1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
|
| 109 |
-
pygame.draw.rect(surface, GREEN, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
# Draw four surrounding squares with labels
|
| 112 |
head_x, head_y = self.snake[0]
|
| 113 |
neighbors = [(head_x, head_y - 1), (head_x, head_y + 1), (head_x - 1, head_y), (head_x + 1, head_y)]
|
|
@@ -137,7 +142,7 @@ def play_game():
|
|
| 137 |
pil_img = Image.fromarray(state_som)
|
| 138 |
convs = [
|
| 139 |
{"role": "system", "content": "You are an agent that can see, talk, and act."},
|
| 140 |
-
{"role": "user", "content": "<image_start><image><image_end>\nWhich mark is closer to green
|
| 141 |
]
|
| 142 |
prompt = magma_processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
|
| 143 |
inputs = magma_processor(images=[pil_img], texts=prompt, return_tensors="pt")
|
|
@@ -183,10 +188,10 @@ MARKDOWN = """
|
|
| 183 |
<div align="center">
|
| 184 |
<h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
|
| 185 |
|
| 186 |
-
Game: Magma finds the apple by moving up, down, left and right.
|
| 187 |
-
|
| 188 |
\[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] \[[Project Page](https://microsoft.github.io/Magma/)\] \[[Github Repo](https://github.com/microsoft/Magma)\] \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\]
|
| 189 |
|
|
|
|
|
|
|
| 190 |
This demo is powered by [Gradio](https://gradio.app/).
|
| 191 |
</div>
|
| 192 |
"""
|
|
|
|
| 16 |
pygame.mixer.quit() # Disable sound
|
| 17 |
|
| 18 |
# Constants
|
| 19 |
+
WIDTH, HEIGHT = 640, 640
|
| 20 |
+
GRID_SIZE = 64
|
| 21 |
WHITE = (255, 255, 255)
|
| 22 |
GREEN = (34, 139, 34) # Forest green - more like an apple
|
| 23 |
RED = (200, 50, 50)
|
|
|
|
| 42 |
magam_model.to("cuda")
|
| 43 |
|
| 44 |
# Load magma image
|
| 45 |
+
magma_img = pygame.image.load("./assets/images/magma_game_thin.png")
|
| 46 |
magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
|
| 47 |
|
| 48 |
+
target_img = pygame.image.load("./assets/images/apple.png")
|
| 49 |
+
target_img = pygame.transform.scale(target_img, (GRID_SIZE, GRID_SIZE))
|
| 50 |
+
|
| 51 |
class MagmaFindGPU:
|
| 52 |
def __init__(self):
|
| 53 |
self.reset()
|
|
|
|
| 109 |
surface.blit(magma_img, (head_x * GRID_SIZE, head_y * GRID_SIZE))
|
| 110 |
|
| 111 |
# pygame.draw.rect(surface, RED, (self.snake[0][0] * GRID_SIZE, self.snake[0][1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
|
| 112 |
+
# pygame.draw.rect(surface, GREEN, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
|
| 113 |
+
# Draw green apple target
|
| 114 |
+
surface.blit(target_img, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE))
|
| 115 |
+
|
| 116 |
# Draw four surrounding squares with labels
|
| 117 |
head_x, head_y = self.snake[0]
|
| 118 |
neighbors = [(head_x, head_y - 1), (head_x, head_y + 1), (head_x - 1, head_y), (head_x + 1, head_y)]
|
|
|
|
| 142 |
pil_img = Image.fromarray(state_som)
|
| 143 |
convs = [
|
| 144 |
{"role": "system", "content": "You are an agent that can see, talk, and act."},
|
| 145 |
+
{"role": "user", "content": "<image_start><image><image_end>\nWhich mark is closer to green apple? Answer with a single number."},
|
| 146 |
]
|
| 147 |
prompt = magma_processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
|
| 148 |
inputs = magma_processor(images=[pil_img], texts=prompt, return_tensors="pt")
|
|
|
|
| 188 |
<div align="center">
|
| 189 |
<h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
|
| 190 |
|
|
|
|
|
|
|
| 191 |
\[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] \[[Project Page](https://microsoft.github.io/Magma/)\] \[[Github Repo](https://github.com/microsoft/Magma)\] \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\]
|
| 192 |
|
| 193 |
+
Game: Magma collects apple by moving up, down, left and right.
|
| 194 |
+
|
| 195 |
This demo is powered by [Gradio](https://gradio.app/).
|
| 196 |
</div>
|
| 197 |
"""
|
assets/images/apple.png
ADDED
|
assets/images/magma_game_thin.png
ADDED
|