Spaces:

jrahn
/

RookWorld

Sleeping

jrahn commited on Sep 23, 2024

Commit

d28b192

verified ·

1 Parent(s): 0a2b7af

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ DEBUG = False
 device = "cuda" if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     "text-generation",
-    model="jrahn/rookworld_7m_3e_gpt2_124M_hf",
     torch_dtype=torch.bfloat16,
     device=device
 )
@@ -34,6 +34,7 @@ def generate_action(state):
     try:
         action = generation[0]['generated_text'].split("B: ")[-1].strip()
         gr.Info(f"Policy generated move: {action}", duration=3)
     except:
         gr.Info(f"Policy generation invalid: {generation}", duration=None)
         action = "0000"
@@ -48,7 +49,8 @@ def generate_state(state, action, history):
     generation = pipe(prompt, **sampling_args)
     if DEBUG: print(generation)
     try:
-        new_state, reward, terminated, truncated = generation[0]['generated_text'].split("+")
         #gr.Info(f"Environment generated state: {new_state}", duration=3)
     except:
         new_state, reward, terminated, truncated = START_POSITION, "0", "0", "1"

 device = "cuda" if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     "text-generation",
+    model="jrahn/RookWorld-LM-124M",
     torch_dtype=torch.bfloat16,
     device=device
 )
     try:
         action = generation[0]['generated_text'].split("B: ")[-1].strip()
         gr.Info(f"Policy generated move: {action}", duration=3)
+        # TODO: display generated CoT
     except:
         gr.Info(f"Policy generation invalid: {generation}", duration=None)
         action = "0000"
     generation = pipe(prompt, **sampling_args)
     if DEBUG: print(generation)
     try:
+        parts = generation[0]['generated_text'].split("+")
+        new_state, reward, terminated, truncated = parts[-4], parts[-3], parts[-2], parts[-1]
         #gr.Info(f"Environment generated state: {new_state}", duration=3)
     except:
         new_state, reward, terminated, truncated = START_POSITION, "0", "0", "1"