Spaces:

Ivan000
/

game

Sleeping

App Files Files Community

Ivan000 commited on Dec 11, 2024

Commit

68621da

verified ·

1 Parent(s): a99511a

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -42

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import pygame
 import random
 import gymnasium as gym
 from stable_baselines3 import DQN
 import gradio as gr
 import cv2
@@ -26,8 +27,6 @@ RED = (255, 0, 0)
 # Initialize Pygame
 pygame.init()
-screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
-pygame.display.set_caption("Arkanoid")
 # Game classes
 class Paddle:
@@ -41,7 +40,6 @@ class Paddle:
             self.rect.x += 10
         self.rect.clamp_ip(pygame.Rect(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT))
 class Ball:
     def __init__(self):
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
@@ -60,14 +58,12 @@ class Ball:
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
         self.velocity = [random.choice([-5, 5]), -5]
 class Brick:
     def __init__(self, x, y):
         self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)
 class ArkanoidEnv(gym.Env):
-    def __init__(self, reward_size=1, penalty_size=-1, platform_reward=2):
         super(ArkanoidEnv, self).__init__()
         self.action_space = gym.spaces.Discrete(3)  # 0: stay, 1: move left, 2: move right
         self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5 + BRICK_ROWS * BRICK_COLS * 2,), dtype=np.float32)
@@ -77,9 +73,12 @@ class ArkanoidEnv(gym.Env):
         self.reset()
     def reset(self, seed=None, options=None):
         self.paddle = Paddle()
         self.ball = Ball()
-        self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
                        for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
         self.done = False
         self.score = 0
@@ -95,28 +94,31 @@ class ArkanoidEnv(gym.Env):
         self.ball.move()
-        reward = 0
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
-            reward += self.platform_reward
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
                 self.score += 1
-                reward += self.reward_size
                 if not self.bricks:
-                    reward += self.reward_size * 10
                     self.done = True
-                    return self._get_state(), reward, self.done, False, {}
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
-            reward += self.penalty_size
-        return self._get_state(), reward, self.done, False, {}
     def _get_state(self):
         state = [
@@ -128,26 +130,26 @@ class ArkanoidEnv(gym.Env):
         ]
         for brick in self.bricks:
             state.extend([brick.rect.x, brick.rect.y])
-        state.extend([0, 0] * (BRICK_ROWS * BRICK_COLS - len(self.bricks)))
         return np.array(state, dtype=np.float32)
-    def render(self, mode='human'):
-        for event in pygame.event.get():
-            if event.type == pygame.QUIT:
-                pygame.quit()
-                return
-        screen.fill(BLACK)
-        pygame.draw.rect(screen, WHITE, self.paddle.rect)
-        pygame.draw.ellipse(screen, WHITE, self.ball.rect)
         for brick in self.bricks:
-            pygame.draw.rect(screen, RED, brick.rect)
-        pygame.display.flip()
-        pygame.time.Clock().tick(FPS)
     def close(self):
         pygame.quit()
 # Training and playing with custom parameters
 def train_and_play(reward_size, penalty_size, platform_reward, iterations):
     env = ArkanoidEnv(reward_size=reward_size, penalty_size=penalty_size, platform_reward=platform_reward)
@@ -163,18 +165,11 @@ def train_and_play(reward_size, penalty_size, platform_reward, iterations):
         obs, _ = env.reset()
         done = False
-        truncated = False
-        while not done and not truncated:
             action, _states = model.predict(obs, deterministic=True)
             obs, reward, done, truncated, _ = env.step(action)
-            try:
-                env.render()
-            except pygame.error:
-                print("Pygame display was closed. Exiting render loop.")
-                return "Training interrupted."
-            frame = pygame.surfarray.array3d(pygame.display.get_surface())
             frame = np.rot90(frame)
             frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
             video_frames.append(frame)
@@ -188,22 +183,20 @@ def train_and_play(reward_size, penalty_size, platform_reward, iterations):
     env.close()
     return video_path
-# Main function
 def main():
     iface = gr.Interface(
         fn=train_and_play,
         inputs=[
             gr.Number(label="Reward Size", value=1),
             gr.Number(label="Penalty Size", value=-1),
-            gr.Number(label="Platform Reward", value=2),
             gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
         ],
         outputs="video",
-        live=False  # Изменено: генерация только после нажатия кнопки
     )
     iface.launch()
 if __name__ == "__main__":
     main()

 import random
 import gymnasium as gym
 from stable_baselines3 import DQN
+from stable_baselines3.common.evaluation import evaluate_policy
 import gradio as gr
 import cv2
 # Initialize Pygame
 pygame.init()
 # Game classes
 class Paddle:
             self.rect.x += 10
         self.rect.clamp_ip(pygame.Rect(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT))
 class Ball:
     def __init__(self):
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
         self.rect = pygame.Rect(SCREEN_WIDTH // 2 - BALL_RADIUS, SCREEN_HEIGHT // 2 - BALL_RADIUS, BALL_RADIUS * 2, BALL_RADIUS * 2)
         self.velocity = [random.choice([-5, 5]), -5]
 class Brick:
     def __init__(self, x, y):
         self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)
 class ArkanoidEnv(gym.Env):
+    def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5):
         super(ArkanoidEnv, self).__init__()
         self.action_space = gym.spaces.Discrete(3)  # 0: stay, 1: move left, 2: move right
         self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5 + BRICK_ROWS * BRICK_COLS * 2,), dtype=np.float32)
         self.reset()
     def reset(self, seed=None, options=None):
+        if seed is not None:
+            random.seed(seed)
+            np.random.seed(seed)
         self.paddle = Paddle()
         self.ball = Ball()
+        self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
                        for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
         self.done = False
         self.score = 0
         self.ball.move()
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
+            self.score += self.platform_reward
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
                 self.score += 1
+                reward = self.reward_size
                 if not self.bricks:
+                    reward += self.reward_size * 10  # Bonus reward for breaking all bricks
                     self.done = True
+                    truncated = False
+                    return self._get_state(), reward, self.done, truncated, {}
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
+            reward = self.penalty_size
+            truncated = False
+        else:
+            reward = 0
+            truncated = False
+        return self._get_state(), reward, self.done, truncated, {}
     def _get_state(self):
         state = [
         ]
         for brick in self.bricks:
             state.extend([brick.rect.x, brick.rect.y])
+        state.extend([0, 0] * (BRICK_ROWS * BRICK_COLS - len(self.bricks)))  # Padding for missing bricks
         return np.array(state, dtype=np.float32)
+    def render(self, mode='rgb_array'):
+        surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
+        surface.fill(BLACK)
+        pygame.draw.rect(surface, WHITE, self.paddle.rect)
+        pygame.draw.ellipse(surface, WHITE, self.ball.rect)
         for brick in self.bricks:
+            pygame.draw.rect(surface, RED, brick.rect)
+        if mode == 'rgb_array':
+            return pygame.surfarray.array3d(surface)
+        elif mode == 'human':
+            pygame.display.get_surface().blit(surface, (0, 0))
+            pygame.display.flip()
     def close(self):
         pygame.quit()
 # Training and playing with custom parameters
 def train_and_play(reward_size, penalty_size, platform_reward, iterations):
     env = ArkanoidEnv(reward_size=reward_size, penalty_size=penalty_size, platform_reward=platform_reward)
         obs, _ = env.reset()
         done = False
+        while not done:
             action, _states = model.predict(obs, deterministic=True)
             obs, reward, done, truncated, _ = env.step(action)
+            frame = env.render(mode='rgb_array')
             frame = np.rot90(frame)
             frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
             video_frames.append(frame)
     env.close()
     return video_path
+# Main function with Gradio interface
 def main():
     iface = gr.Interface(
         fn=train_and_play,
         inputs=[
             gr.Number(label="Reward Size", value=1),
             gr.Number(label="Penalty Size", value=-1),
+            gr.Number(label="Platform Reward", value=5),
             gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
         ],
         outputs="video",
+        live=False  # Disable auto-generation on slider changes
     )
     iface.launch()
 if __name__ == "__main__":
     main()