HemanM commited on
Commit
e7e30db
·
verified ·
1 Parent(s): 646f772

Update watchdog.py

Browse files
Files changed (1) hide show
  1. watchdog.py +54 -59
watchdog.py CHANGED
@@ -1,61 +1,56 @@
1
- # watchdog.py
2
-
3
  import torch
4
- from transformers import AutoTokenizer
5
- from evo_model import EvoTransformerForClassification
6
- from firebase_admin import firestore
7
- import pandas as pd
8
-
9
- # Load tokenizer
10
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
11
-
12
- def load_feedback_data():
13
- db = firestore.client()
14
- docs = db.collection("evo_feedback_logs").stream()
15
- data = []
 
 
 
 
16
  for doc in docs:
17
- d = doc.to_dict()
18
- if all(k in d for k in ["goal", "solution_1", "solution_2", "correct_answer"]):
19
- data.append((
20
- d["goal"],
21
- d["solution_1"],
22
- d["solution_2"],
23
- 0 if d["correct_answer"] == "Solution 1" else 1
24
- ))
25
- return pd.DataFrame(data, columns=["goal", "sol1", "sol2", "label"])
26
-
27
- def encode(goal, sol1, sol2):
28
- prompt = f"Goal: {goal} Option 1: {sol1} Option 2: {sol2}"
29
- return tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
30
-
31
- def manual_retrain():
32
- try:
33
- data = load_feedback_data()
34
- if data.empty:
35
- print("[Retrain Error] No training data found.")
36
- return False
37
-
38
- model = EvoTransformerForClassification.from_pretrained("trained_model")
39
- optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
40
-
41
- model.train()
42
- for _, row in data.sample(frac=1).iterrows():
43
- encoded = encode(row["goal"], row["sol1"], row["sol2"])
44
- labels = torch.tensor([row["label"]])
45
- outputs = model(input_ids=encoded["input_ids"], attention_mask=encoded["attention_mask"], labels=labels)
46
-
47
- if isinstance(outputs, tuple):
48
- loss = outputs[0]
49
- else:
50
- loss = outputs
51
-
52
- optimizer.zero_grad()
53
- loss.backward()
54
- optimizer.step()
55
-
56
- model.save_pretrained("trained_model")
57
- print("✅ Evo retrained and saved.")
58
- return True
59
- except Exception as e:
60
- print(f"[Retrain Error] {e}")
61
- return False
 
1
+ import os
 
2
  import torch
3
+ import firebase_admin
4
+ from firebase_admin import credentials, firestore
5
+ from model import SimpleEvoModel
6
+
7
+ # Initialize Firebase if not already initialized
8
+ if not firebase_admin._apps:
9
+ cred = credentials.Certificate("firebase_key.json")
10
+ firebase_admin.initialize_app(cred)
11
+
12
+ db = firestore.client()
13
+
14
+ def fetch_training_data():
15
+ logs_ref = db.collection("evo_feedback")
16
+ docs = logs_ref.stream()
17
+
18
+ inputs, labels = [], []
19
  for doc in docs:
20
+ data = doc.to_dict()
21
+ goal = data.get("prompt", "")
22
+ winner = data.get("winner", "")
23
+ if winner:
24
+ # Simulated encoding
25
+ vector = [float(ord(c) % 256) / 255.0 for c in (goal + winner)]
26
+ vector = vector[:768] + [0.0] * max(0, 768 - len(vector)) # pad/truncate
27
+ label = 0 if "1" in winner else 1
28
+ inputs.append(vector)
29
+ labels.append(label)
30
+
31
+ return torch.tensor(inputs, dtype=torch.float32), torch.tensor(labels, dtype=torch.long)
32
+
33
+ def retrain_and_save():
34
+ X, y = fetch_training_data()
35
+ if len(X) < 2:
36
+ print("⚠️ Not enough training data.")
37
+ return
38
+
39
+ model = SimpleEvoModel()
40
+ loss_fn = torch.nn.CrossEntropyLoss()
41
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
42
+
43
+ for epoch in range(5):
44
+ optimizer.zero_grad()
45
+ output = model(X)
46
+ loss = loss_fn(output, y)
47
+ loss.backward()
48
+ optimizer.step()
49
+
50
+ # Save retrained model to trained_model/
51
+ os.makedirs("trained_model", exist_ok=True)
52
+ torch.save(model.state_dict(), "trained_model/pytorch_model.bin")
53
+ print("✅ EvoTransformer retrained and saved to trained_model/")
54
+
55
+ if __name__ == "__main__":
56
+ retrain_and_save()