Spaces:

Ivan000
/

game

Sleeping

App Files Files Community

Ivan000 commited on Dec 12, 2024

Commit

a14aa44

verified ·

1 Parent(s): 2138c0e

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -25

app.py CHANGED Viewed

@@ -32,7 +32,6 @@ pygame.init()
 class Paddle:
     def __init__(self):
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - PADDLE_WIDTH // 2, SCREEN_HEIGHT - PADDLE_HEIGHT - 10, PADDLE_WIDTH, PADDLE_HEIGHT)
-        self.last_position = self.rect.x
     def move(self, direction):
         if direction == -1:
@@ -73,13 +72,9 @@ class ArkanoidEnv(gym.Env):
         self.platform_reward = platform_reward
         self.inactivity_penalty = inactivity_penalty
         self.inactivity_counter = 0
-        self.last_action = 0
         self.reset()
     def reset(self, seed=None, options=None):
-        if seed is not None:
-            random.seed(seed)
-            np.random.seed(seed)
         self.paddle = Paddle()
         self.ball = Ball()
         self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
@@ -87,11 +82,9 @@ class ArkanoidEnv(gym.Env):
         self.done = False
         self.score = 0
         self.inactivity_counter = 0
-        self.last_action = None
         return self._get_state(), {}
     def step(self, action):
-        # Apply action
         if action == 0:
             self.paddle.move(0)
         elif action == 1:
@@ -99,7 +92,6 @@ class ArkanoidEnv(gym.Env):
         elif action == 2:
             self.paddle.move(1)
-        # Update inactivity penalty
         if action == 0:
             self.inactivity_counter += 1 / FPS
         else:
@@ -109,42 +101,31 @@ class ArkanoidEnv(gym.Env):
             reward = self.inactivity_penalty
             return self._get_state(), reward, self.done, False, {}
-        # Update ball position
         self.ball.move()
-        # Collision with paddle
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
-            self.ball.velocity[0] += random.uniform(-1, 1)  # Add random offset to angle
             self.score += self.platform_reward
-        # Collision with bricks
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
-                self.ball.velocity[0] += random.uniform(-1, 1)  # Add random offset to angle
                 self.score += 1
                 if not self.bricks:
                     self.done = True
                     return self._get_state(), self.reward_size, self.done, False, {}
-        # Check if ball is out of bounds
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
-            reward = self.penalty_size
-            return self._get_state(), reward, self.done, False, {}
-        # Calculate reward for breaking bricks
-        reward = 0
-        return self._get_state(), reward, self.done, False, {}
     def _get_state(self):
-        return np.array([
-            self.ball.rect.x,
-            self.paddle.rect.x,
-            len(self.bricks)
-        ], dtype=np.float32)
     def render(self, mode='rgb_array'):
         surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
@@ -163,7 +144,32 @@ class ArkanoidEnv(gym.Env):
     def close(self):
         pygame.quit()
-# Main function remains unchanged
 def main():
     iface = gr.Interface(
         fn=train_and_play,

 class Paddle:
     def __init__(self):
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - PADDLE_WIDTH // 2, SCREEN_HEIGHT - PADDLE_HEIGHT - 10, PADDLE_WIDTH, PADDLE_HEIGHT)
     def move(self, direction):
         if direction == -1:
         self.platform_reward = platform_reward
         self.inactivity_penalty = inactivity_penalty
         self.inactivity_counter = 0
         self.reset()
     def reset(self, seed=None, options=None):
         self.paddle = Paddle()
         self.ball = Ball()
         self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
         self.done = False
         self.score = 0
         self.inactivity_counter = 0
         return self._get_state(), {}
     def step(self, action):
         if action == 0:
             self.paddle.move(0)
         elif action == 1:
         elif action == 2:
             self.paddle.move(1)
         if action == 0:
             self.inactivity_counter += 1 / FPS
         else:
             reward = self.inactivity_penalty
             return self._get_state(), reward, self.done, False, {}
         self.ball.move()
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
+            self.ball.velocity[0] += random.uniform(-1, 1)
             self.score += self.platform_reward
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
+                self.ball.velocity[0] += random.uniform(-1, 1)
                 self.score += 1
                 if not self.bricks:
                     self.done = True
                     return self._get_state(), self.reward_size, self.done, False, {}
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
+            return self._get_state(), self.penalty_size, self.done, False, {}
+        return self._get_state(), 0, self.done, False, {}
     def _get_state(self):
+        return np.array([self.ball.rect.x, self.paddle.rect.x, len(self.bricks)], dtype=np.float32)
     def render(self, mode='rgb_array'):
         surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
     def close(self):
         pygame.quit()
+# Training and playing function
+def train_and_play(reward_size, penalty_size, platform_reward, inactivity_penalty, iterations):
+    env = ArkanoidEnv(reward_size, penalty_size, platform_reward, inactivity_penalty)
+    model = DQN("MlpPolicy", env, verbose=0)
+    model.learn(total_timesteps=iterations)
+    obs, _ = env.reset()
+    frames = []
+    while True:
+        action, _states = model.predict(obs)
+        obs, _, done, _, _ = env.step(action)
+        frame = env.render(mode="rgb_array")
+        frames.append(frame)
+        if done:
+            break
+    env.close()
+    video_path = "/tmp/arkanoid.mp4"
+    out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (SCREEN_WIDTH, SCREEN_HEIGHT))
+    for frame in frames:
+        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+        out.write(frame)
+    out.release()
+    return video_path
+# Gradio interface
 def main():
     iface = gr.Interface(
         fn=train_and_play,