Spaces:

microsoft
/

Magma-Gaming

Configuration error

App Files Files Community

jw2yang commited on Feb 26

Commit

2f72390

1 Parent(s): 28c8be9

update

Browse files

Files changed (3) hide show

app.py +13 -8
assets/images/apple.png +0 -0
assets/images/magma_game_thin.png +0 -0

app.py CHANGED Viewed

@@ -16,8 +16,8 @@ import random
 pygame.mixer.quit()  # Disable sound
 # Constants
-WIDTH, HEIGHT = 800, 800
-GRID_SIZE = 80
 WHITE = (255, 255, 255)
 GREEN = (34, 139, 34)  # Forest green - more like an apple
 RED = (200, 50, 50)
@@ -42,9 +42,12 @@ magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_cod
 magam_model.to("cuda")
 # Load magma image
-magma_img = pygame.image.load("./assets/images/magma_game.png")
 magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
 class MagmaFindGPU:
     def __init__(self):
         self.reset()
@@ -106,8 +109,10 @@ class MagmaFindGPU:
         surface.blit(magma_img, (head_x * GRID_SIZE, head_y * GRID_SIZE))
         # pygame.draw.rect(surface, RED, (self.snake[0][0] * GRID_SIZE, self.snake[0][1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
-        pygame.draw.rect(surface, GREEN, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
         # Draw four surrounding squares with labels
         head_x, head_y = self.snake[0]
         neighbors = [(head_x, head_y - 1), (head_x, head_y + 1), (head_x - 1, head_y), (head_x + 1, head_y)]
@@ -137,7 +142,7 @@ def play_game():
     pil_img = Image.fromarray(state_som)
     convs = [
         {"role": "system", "content": "You are an agent that can see, talk, and act."},
-        {"role": "user", "content": "<image_start><image><image_end>\nWhich mark is closer to green block? Answer with a single number."},
     ]
     prompt = magma_processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
     inputs = magma_processor(images=[pil_img], texts=prompt, return_tensors="pt")
@@ -183,10 +188,10 @@ MARKDOWN = """
 <div align="center">
 <h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
-Game: Magma finds the apple by moving up, down, left and right.
 \[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] &nbsp; \[[Project Page](https://microsoft.github.io/Magma/)\] &nbsp; \[[Github Repo](https://github.com/microsoft/Magma)\] &nbsp; \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\] &nbsp;
 This demo is powered by [Gradio](https://gradio.app/).
 </div>
 """

 pygame.mixer.quit()  # Disable sound
 # Constants
+WIDTH, HEIGHT = 640, 640
+GRID_SIZE = 64
 WHITE = (255, 255, 255)
 GREEN = (34, 139, 34)  # Forest green - more like an apple
 RED = (200, 50, 50)
 magam_model.to("cuda")
 # Load magma image
+magma_img = pygame.image.load("./assets/images/magma_game_thin.png")
 magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
+target_img = pygame.image.load("./assets/images/apple.png")
+target_img = pygame.transform.scale(target_img, (GRID_SIZE, GRID_SIZE))
 class MagmaFindGPU:
     def __init__(self):
         self.reset()
         surface.blit(magma_img, (head_x * GRID_SIZE, head_y * GRID_SIZE))
         # pygame.draw.rect(surface, RED, (self.snake[0][0] * GRID_SIZE, self.snake[0][1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
+        # pygame.draw.rect(surface, GREEN, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE, GRID_SIZE, GRID_SIZE))
+        # Draw green apple target
+        surface.blit(target_img, (self.target[0] * GRID_SIZE, self.target[1] * GRID_SIZE))
         # Draw four surrounding squares with labels
         head_x, head_y = self.snake[0]
         neighbors = [(head_x, head_y - 1), (head_x, head_y + 1), (head_x - 1, head_y), (head_x + 1, head_y)]
     pil_img = Image.fromarray(state_som)
     convs = [
         {"role": "system", "content": "You are an agent that can see, talk, and act."},
+        {"role": "user", "content": "<image_start><image><image_end>\nWhich mark is closer to green apple? Answer with a single number."},
     ]
     prompt = magma_processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
     inputs = magma_processor(images=[pil_img], texts=prompt, return_tensors="pt")
 <div align="center">
 <h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
 \[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] &nbsp; \[[Project Page](https://microsoft.github.io/Magma/)\] &nbsp; \[[Github Repo](https://github.com/microsoft/Magma)\] &nbsp; \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\] &nbsp;
+Game: Magma collects apple by moving up, down, left and right.
 This demo is powered by [Gradio](https://gradio.app/).
 </div>
 """

assets/images/apple.png ADDED Viewed

assets/images/magma_game_thin.png ADDED Viewed