Spaces:
Sleeping
Sleeping
Add correct char reward as a constant
Browse files- wordle_env/const.py +1 -0
- wordle_env/state.py +4 -4
wordle_env/const.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
WORDLE_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
| 2 |
WORDLE_N = 5
|
| 3 |
REWARD = 10
|
|
|
|
|
|
| 1 |
WORDLE_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
| 2 |
WORDLE_N = 5
|
| 3 |
REWARD = 10
|
| 4 |
+
CHAR_REWARD = 0.1
|
wordle_env/state.py
CHANGED
|
@@ -12,10 +12,10 @@ where status has codes
|
|
| 12 |
[0, 0, 1] - char is definitely in this spot
|
| 13 |
"""
|
| 14 |
import collections
|
| 15 |
-
from typing import List
|
| 16 |
import numpy as np
|
| 17 |
|
| 18 |
-
from .const import WORDLE_CHARS, WORDLE_N
|
| 19 |
|
| 20 |
|
| 21 |
WordleState = np.ndarray
|
|
@@ -138,7 +138,7 @@ def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
|
|
| 138 |
return update_from_mask(state, word, mask)
|
| 139 |
|
| 140 |
|
| 141 |
-
def update(state: WordleState, word: str, goal_word: str) -> WordleState:
|
| 142 |
state = state.copy()
|
| 143 |
reward = 0
|
| 144 |
state[0] -= 1
|
|
@@ -149,7 +149,7 @@ def update(state: WordleState, word: str, goal_word: str) -> WordleState:
|
|
| 149 |
if goal_word[i] == c:
|
| 150 |
# char at position i = yes, all other chars at position i == no
|
| 151 |
if state[offset + 3 * i:offset + 3 * i + 3][2] == 0:
|
| 152 |
-
reward +=
|
| 153 |
state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
|
| 154 |
for ocint in range(len(WORDLE_CHARS)):
|
| 155 |
if ocint != cint:
|
|
|
|
| 12 |
[0, 0, 1] - char is definitely in this spot
|
| 13 |
"""
|
| 14 |
import collections
|
| 15 |
+
from typing import List, Tuple
|
| 16 |
import numpy as np
|
| 17 |
|
| 18 |
+
from .const import CHAR_REWARD, WORDLE_CHARS, WORDLE_N
|
| 19 |
|
| 20 |
|
| 21 |
WordleState = np.ndarray
|
|
|
|
| 138 |
return update_from_mask(state, word, mask)
|
| 139 |
|
| 140 |
|
| 141 |
+
def update(state: WordleState, word: str, goal_word: str) -> Tuple(WordleState, float):
|
| 142 |
state = state.copy()
|
| 143 |
reward = 0
|
| 144 |
state[0] -= 1
|
|
|
|
| 149 |
if goal_word[i] == c:
|
| 150 |
# char at position i = yes, all other chars at position i == no
|
| 151 |
if state[offset + 3 * i:offset + 3 * i + 3][2] == 0:
|
| 152 |
+
reward += CHAR_REWARD
|
| 153 |
state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
|
| 154 |
for ocint in range(len(WORDLE_CHARS)):
|
| 155 |
if ocint != cint:
|