Spaces:

Ivan000
/

game

Sleeping

App Files Files Community

Ivan000 commited on Dec 12, 2024

Commit

2138c0e

verified ·

1 Parent(s): 68621da

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -57

app.py CHANGED Viewed

@@ -32,6 +32,7 @@ pygame.init()
 class Paddle:
     def __init__(self):
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - PADDLE_WIDTH // 2, SCREEN_HEIGHT - PADDLE_HEIGHT - 10, PADDLE_WIDTH, PADDLE_HEIGHT)
     def move(self, direction):
         if direction == -1:
@@ -63,13 +64,16 @@ class Brick:
         self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)
 class ArkanoidEnv(gym.Env):
-    def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5):
         super(ArkanoidEnv, self).__init__()
         self.action_space = gym.spaces.Discrete(3)  # 0: stay, 1: move left, 2: move right
-        self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5 + BRICK_ROWS * BRICK_COLS * 2,), dtype=np.float32)
         self.reward_size = reward_size
         self.penalty_size = penalty_size
         self.platform_reward = platform_reward
         self.reset()
     def reset(self, seed=None, options=None):
@@ -82,9 +86,12 @@ class ArkanoidEnv(gym.Env):
                        for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
         self.done = False
         self.score = 0
         return self._get_state(), {}
     def step(self, action):
         if action == 0:
             self.paddle.move(0)
         elif action == 1:
@@ -92,46 +99,52 @@ class ArkanoidEnv(gym.Env):
         elif action == 2:
             self.paddle.move(1)
         self.ball.move()
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
             self.score += self.platform_reward
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
                 self.score += 1
-                reward = self.reward_size
                 if not self.bricks:
-                    reward += self.reward_size * 10  # Bonus reward for breaking all bricks
                     self.done = True
-                    truncated = False
-                    return self._get_state(), reward, self.done, truncated, {}
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
             reward = self.penalty_size
-            truncated = False
-        else:
-            reward = 0
-            truncated = False
-        return self._get_state(), reward, self.done, truncated, {}
     def _get_state(self):
-        state = [
-            self.paddle.rect.x,
             self.ball.rect.x,
-            self.ball.rect.y,
-            self.ball.velocity[0],
-            self.ball.velocity[1]
-        ]
-        for brick in self.bricks:
-            state.extend([brick.rect.x, brick.rect.y])
-        state.extend([0, 0] * (BRICK_ROWS * BRICK_COLS - len(self.bricks)))  # Padding for missing bricks
-        return np.array(state, dtype=np.float32)
     def render(self, mode='rgb_array'):
         surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
@@ -150,40 +163,7 @@ class ArkanoidEnv(gym.Env):
     def close(self):
         pygame.quit()
-# Training and playing with custom parameters
-def train_and_play(reward_size, penalty_size, platform_reward, iterations):
-    env = ArkanoidEnv(reward_size=reward_size, penalty_size=penalty_size, platform_reward=platform_reward)
-    model = DQN('MlpPolicy', env, verbose=1)
-    timesteps_per_update = min(1000, iterations)
-    video_frames = []
-    completed_iterations = 0
-    while completed_iterations < iterations:
-        steps = min(timesteps_per_update, iterations - completed_iterations)
-        model.learn(total_timesteps=steps)
-        completed_iterations += steps
-        obs, _ = env.reset()
-        done = False
-        while not done:
-            action, _states = model.predict(obs, deterministic=True)
-            obs, reward, done, truncated, _ = env.step(action)
-            frame = env.render(mode='rgb_array')
-            frame = np.rot90(frame)
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-            video_frames.append(frame)
-    video_path = "arkanoid_training.mp4"
-    video_writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (SCREEN_WIDTH, SCREEN_HEIGHT))
-    for frame in video_frames:
-        video_writer.write(frame)
-    video_writer.release()
-    env.close()
-    return video_path
-# Main function with Gradio interface
 def main():
     iface = gr.Interface(
         fn=train_and_play,
@@ -191,10 +171,10 @@ def main():
             gr.Number(label="Reward Size", value=1),
             gr.Number(label="Penalty Size", value=-1),
             gr.Number(label="Platform Reward", value=5),
             gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
         ],
-        outputs="video",
-        live=False  # Disable auto-generation on slider changes
     )
     iface.launch()

 class Paddle:
     def __init__(self):
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - PADDLE_WIDTH // 2, SCREEN_HEIGHT - PADDLE_HEIGHT - 10, PADDLE_WIDTH, PADDLE_HEIGHT)
+        self.last_position = self.rect.x
     def move(self, direction):
         if direction == -1:
         self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)
 class ArkanoidEnv(gym.Env):
+    def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5, inactivity_penalty=-0.5):
         super(ArkanoidEnv, self).__init__()
         self.action_space = gym.spaces.Discrete(3)  # 0: stay, 1: move left, 2: move right
+        self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(3,), dtype=np.float32)
         self.reward_size = reward_size
         self.penalty_size = penalty_size
         self.platform_reward = platform_reward
+        self.inactivity_penalty = inactivity_penalty
+        self.inactivity_counter = 0
+        self.last_action = 0
         self.reset()
     def reset(self, seed=None, options=None):
                        for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
         self.done = False
         self.score = 0
+        self.inactivity_counter = 0
+        self.last_action = None
         return self._get_state(), {}
     def step(self, action):
+        # Apply action
         if action == 0:
             self.paddle.move(0)
         elif action == 1:
         elif action == 2:
             self.paddle.move(1)
+        # Update inactivity penalty
+        if action == 0:
+            self.inactivity_counter += 1 / FPS
+        else:
+            self.inactivity_counter = 0
+        if self.inactivity_counter >= 1:
+            reward = self.inactivity_penalty
+            return self._get_state(), reward, self.done, False, {}
+        # Update ball position
         self.ball.move()
+        # Collision with paddle
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
+            self.ball.velocity[0] += random.uniform(-1, 1)  # Add random offset to angle
             self.score += self.platform_reward
+        # Collision with bricks
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
+                self.ball.velocity[0] += random.uniform(-1, 1)  # Add random offset to angle
                 self.score += 1
                 if not self.bricks:
                     self.done = True
+                    return self._get_state(), self.reward_size, self.done, False, {}
+        # Check if ball is out of bounds
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
             reward = self.penalty_size
+            return self._get_state(), reward, self.done, False, {}
+        # Calculate reward for breaking bricks
+        reward = 0
+        return self._get_state(), reward, self.done, False, {}
     def _get_state(self):
+        return np.array([
             self.ball.rect.x,
+            self.paddle.rect.x,
+            len(self.bricks)
+        ], dtype=np.float32)
     def render(self, mode='rgb_array'):
         surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
     def close(self):
         pygame.quit()
+# Main function remains unchanged
 def main():
     iface = gr.Interface(
         fn=train_and_play,
             gr.Number(label="Reward Size", value=1),
             gr.Number(label="Penalty Size", value=-1),
             gr.Number(label="Platform Reward", value=5),
+            gr.Number(label="Inactivity Penalty", value=-0.5),
             gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
         ],
+        outputs="video"
     )
     iface.launch()