Spaces:
Sleeping
Sleeping
Fix styles and deleted unused code of wordle_env
Browse files- wordle_env/__init__.py +0 -3
- wordle_env/state.py +2 -4
- wordle_env/wordle.py +17 -39
wordle_env/__init__.py
CHANGED
|
@@ -7,9 +7,6 @@ from gym.envs.registration import (
|
|
| 7 |
)
|
| 8 |
import os
|
| 9 |
from . import wordle
|
| 10 |
-
# Classic
|
| 11 |
-
# ----------------------------------------
|
| 12 |
-
current_dir = os.path.realpath(os.path.dirname(__file__))
|
| 13 |
|
| 14 |
|
| 15 |
register(
|
|
|
|
| 7 |
)
|
| 8 |
import os
|
| 9 |
from . import wordle
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
register(
|
wordle_env/state.py
CHANGED
|
@@ -2,13 +2,11 @@
|
|
| 2 |
Keep the state in a 1D int array
|
| 3 |
|
| 4 |
index[0] = remaining steps
|
| 5 |
-
Rest of data is laid out as binary array
|
| 6 |
-
|
| 7 |
-
[1..27] = whether char has been guessed or not
|
| 8 |
|
| 9 |
[[status, status, status, status, status]
|
| 10 |
for _ in "ABCD..."]
|
| 11 |
where status has codes
|
|
|
|
| 12 |
[1, 0, 0] - char is definitely not in this spot
|
| 13 |
[0, 1, 0] - char is maybe in this spot
|
| 14 |
[0, 0, 1] - char is definitely in this spot
|
|
@@ -126,6 +124,7 @@ def get_mask(word: str, goal_word: str) -> List[int]:
|
|
| 126 |
|
| 127 |
return mask
|
| 128 |
|
|
|
|
| 129 |
def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
|
| 130 |
"""
|
| 131 |
return a copy of state that has been updated to new state
|
|
@@ -171,4 +170,3 @@ def update(state: WordleState, word: str, goal_word: str) -> WordleState:
|
|
| 171 |
state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
|
| 172 |
processed_letters.append(c)
|
| 173 |
return state, reward
|
| 174 |
-
|
|
|
|
| 2 |
Keep the state in a 1D int array
|
| 3 |
|
| 4 |
index[0] = remaining steps
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
[[status, status, status, status, status]
|
| 7 |
for _ in "ABCD..."]
|
| 8 |
where status has codes
|
| 9 |
+
[0, 0, 0] - no information about the char
|
| 10 |
[1, 0, 0] - char is definitely not in this spot
|
| 11 |
[0, 1, 0] - char is maybe in this spot
|
| 12 |
[0, 0, 1] - char is definitely in this spot
|
|
|
|
| 124 |
|
| 125 |
return mask
|
| 126 |
|
| 127 |
+
|
| 128 |
def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
|
| 129 |
"""
|
| 130 |
return a copy of state that has been updated to new state
|
|
|
|
| 170 |
state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
|
| 171 |
processed_letters.append(c)
|
| 172 |
return state, reward
|
|
|
wordle_env/wordle.py
CHANGED
|
@@ -12,9 +12,11 @@ from .words import complete_vocabulary, target_vocabulary
|
|
| 12 |
|
| 13 |
import random
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
|
| 19 |
class WordleEnvBase(gym.Env):
|
| 20 |
"""
|
|
@@ -23,21 +25,21 @@ class WordleEnvBase(gym.Env):
|
|
| 23 |
* 13k for full vocab
|
| 24 |
State space is defined as:
|
| 25 |
* 6 possibilities for turns (WORDLE_TURNS)
|
| 26 |
-
* Each VALID_CHAR has a state of 0/1 for whether it's been guessed before
|
| 27 |
* For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
|
| 28 |
for full game, this is (3^5)^26
|
| 29 |
Each state has 1 + 5*26 possibilities
|
| 30 |
Reward:
|
| 31 |
Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
|
|
|
|
| 32 |
Starting State:
|
| 33 |
Random goal word
|
| 34 |
-
Initial state with turn 0, all chars Unvisited
|
| 35 |
"""
|
|
|
|
| 36 |
def __init__(self, words: List[str],
|
| 37 |
-
max_turns: int=6,
|
| 38 |
-
allowable_words: Optional[int]=None,
|
| 39 |
-
|
| 40 |
-
mask_based_state_updates: bool=False):
|
| 41 |
assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
|
| 42 |
self.words = words
|
| 43 |
self.max_turns = max_turns
|
|
@@ -46,16 +48,11 @@ class WordleEnvBase(gym.Env):
|
|
| 46 |
if not self.allowable_words:
|
| 47 |
self.allowable_words = len(self.words)
|
| 48 |
|
| 49 |
-
self.frequencies = None
|
| 50 |
-
if frequencies:
|
| 51 |
-
assert len(words) == len(frequencies), f'{len(words), len(frequencies)}'
|
| 52 |
-
self.frequencies = np.array(frequencies, dtype=np.float32) / sum(frequencies)
|
| 53 |
-
|
| 54 |
self.action_space = spaces.Discrete(self.words_as_action_space())
|
| 55 |
self.observation_space = spaces.MultiDiscrete(state.get_nvec(self.max_turns))
|
| 56 |
|
| 57 |
self.done = True
|
| 58 |
-
self.goal_word:
|
| 59 |
|
| 60 |
self.state: state.WordleState = None
|
| 61 |
self.state_updater = state.update
|
|
@@ -74,17 +71,17 @@ class WordleEnvBase(gym.Env):
|
|
| 74 |
goal_word = self.words[self.goal_word]
|
| 75 |
# assert word in self.words, f'{word} not in words list'
|
| 76 |
self.state, r = self.state_updater(state=self.state,
|
| 77 |
-
|
| 78 |
-
|
| 79 |
|
| 80 |
reward = r
|
| 81 |
if action == self.goal_word:
|
| 82 |
self.done = True
|
| 83 |
-
#reward = REWARD
|
| 84 |
if state.remaining_steps(self.state) == self.max_turns-1:
|
| 85 |
-
reward = 0
|
| 86 |
else:
|
| 87 |
-
#reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
|
| 88 |
reward = REWARD
|
| 89 |
elif state.remaining_steps(self.state) == 0:
|
| 90 |
self.done = True
|
|
@@ -108,25 +105,6 @@ class WordleEnvBase(gym.Env):
|
|
| 108 |
def words_as_action_space(self):
|
| 109 |
return len(self.words)
|
| 110 |
|
| 111 |
-
def encode_word(self, word):
|
| 112 |
-
encoded_word = np.array(
|
| 113 |
-
[[0] * WORDLE_N] * len(WORDLE_CHARS),
|
| 114 |
-
dtype=np.int32
|
| 115 |
-
)
|
| 116 |
-
for index, letter in enumerate(word):
|
| 117 |
-
cint = WORDLE_CHARS.index(letter)
|
| 118 |
-
encoded_word[cint][index] = 1
|
| 119 |
-
return encoded_word
|
| 120 |
-
|
| 121 |
-
def decode_word(self, action):
|
| 122 |
-
word = [''] * WORDLE_N
|
| 123 |
-
for index, letter_vec in enumerate(action):
|
| 124 |
-
if 1 in letter_vec:
|
| 125 |
-
for i, j in enumerate(letter_vec):
|
| 126 |
-
if j == 1:
|
| 127 |
-
word[i] = WORDLE_CHARS[index]
|
| 128 |
-
return ''.join(word)
|
| 129 |
-
|
| 130 |
|
| 131 |
class WordleEnv100OneAction(WordleEnvBase):
|
| 132 |
def __init__(self):
|
|
|
|
| 12 |
|
| 13 |
import random
|
| 14 |
|
| 15 |
+
|
| 16 |
+
def _load_words(limit: Optional[int] = None, complete: Optional[bool] = False) -> List[str]:
|
| 17 |
+
words = complete_vocabulary if complete else target_vocabulary
|
| 18 |
+
return words if not limit else words[:limit]
|
| 19 |
+
|
| 20 |
|
| 21 |
class WordleEnvBase(gym.Env):
|
| 22 |
"""
|
|
|
|
| 25 |
* 13k for full vocab
|
| 26 |
State space is defined as:
|
| 27 |
* 6 possibilities for turns (WORDLE_TURNS)
|
|
|
|
| 28 |
* For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
|
| 29 |
for full game, this is (3^5)^26
|
| 30 |
Each state has 1 + 5*26 possibilities
|
| 31 |
Reward:
|
| 32 |
Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
|
| 33 |
+
1 from every letter correctly guessed on each try
|
| 34 |
Starting State:
|
| 35 |
Random goal word
|
| 36 |
+
Initial state with turn 0, all chars Unvisited
|
| 37 |
"""
|
| 38 |
+
|
| 39 |
def __init__(self, words: List[str],
|
| 40 |
+
max_turns: int = 6,
|
| 41 |
+
allowable_words: Optional[int] = None,
|
| 42 |
+
mask_based_state_updates: bool = False):
|
|
|
|
| 43 |
assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
|
| 44 |
self.words = words
|
| 45 |
self.max_turns = max_turns
|
|
|
|
| 48 |
if not self.allowable_words:
|
| 49 |
self.allowable_words = len(self.words)
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
self.action_space = spaces.Discrete(self.words_as_action_space())
|
| 52 |
self.observation_space = spaces.MultiDiscrete(state.get_nvec(self.max_turns))
|
| 53 |
|
| 54 |
self.done = True
|
| 55 |
+
self.goal_word: int = -1
|
| 56 |
|
| 57 |
self.state: state.WordleState = None
|
| 58 |
self.state_updater = state.update
|
|
|
|
| 71 |
goal_word = self.words[self.goal_word]
|
| 72 |
# assert word in self.words, f'{word} not in words list'
|
| 73 |
self.state, r = self.state_updater(state=self.state,
|
| 74 |
+
word=word,
|
| 75 |
+
goal_word=goal_word)
|
| 76 |
|
| 77 |
reward = r
|
| 78 |
if action == self.goal_word:
|
| 79 |
self.done = True
|
| 80 |
+
# reward = REWARD
|
| 81 |
if state.remaining_steps(self.state) == self.max_turns-1:
|
| 82 |
+
reward = 0 # -10*REWARD # No reward for guessing off the bat
|
| 83 |
else:
|
| 84 |
+
# reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
|
| 85 |
reward = REWARD
|
| 86 |
elif state.remaining_steps(self.state) == 0:
|
| 87 |
self.done = True
|
|
|
|
| 105 |
def words_as_action_space(self):
|
| 106 |
return len(self.words)
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
class WordleEnv100OneAction(WordleEnvBase):
|
| 110 |
def __init__(self):
|