HemanM commited on
Commit
da42a90
Β·
verified Β·
1 Parent(s): 6604c50

Update watchdog.py

Browse files
Files changed (1) hide show
  1. watchdog.py +62 -45
watchdog.py CHANGED
@@ -1,71 +1,88 @@
1
  # watchdog.py
2
 
3
- import torch
4
- from evo_model import EvoTransformerForClassification, EvoTransformerConfig
5
- from transformers import BertTokenizer
6
  import firebase_admin
7
  from firebase_admin import credentials, firestore
8
- import os
9
- from datetime import datetime
10
-
11
- # βœ… Load tokenizer
12
- tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
 
13
 
14
- # βœ… Init Firebase
15
  if not firebase_admin._apps:
16
  cred = credentials.Certificate("firebase_key.json")
17
  firebase_admin.initialize_app(cred)
18
 
19
  db = firestore.client()
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def manual_retrain():
22
  try:
23
- # πŸ” Fetch feedback logs
24
  docs = db.collection("evo_feedback_logs").stream()
25
- data = []
26
- for doc in docs:
27
- d = doc.to_dict()
28
- if all(k in d for k in ["goal", "solution_1", "solution_2", "correct_answer"]):
29
- label = 0 if d["correct_answer"] == "Solution 1" else 1
30
- combined = f"{d['goal']} [SEP] {d['solution_1']} [SEP] {d['solution_2']}"
31
- data.append((combined, label))
32
 
33
- if not data:
34
- print("❌ No valid training data found.")
35
  return False
36
 
37
- # βœ… Tokenize
38
- inputs = tokenizer([x[0] for x in data], padding=True, truncation=True, return_tensors="pt")
39
- labels = torch.tensor([x[1] for x in data])
40
 
41
- # βœ… Load config + model
42
- config = {
43
- "vocab_size": tokenizer.vocab_size,
44
- "d_model": 256,
45
- "nhead": 4,
46
- "dim_feedforward": 512,
47
- "num_hidden_layers": 4
48
- }
49
- model_config = EvoTransformerConfig(**config)
50
- model = EvoTransformerForClassification(model_config)
51
 
52
- # βœ… Loss + optimizer
53
- criterion = torch.nn.CrossEntropyLoss()
54
- optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
55
 
56
- # βœ… Train (simple 3-epoch fine-tune)
57
- model.train()
58
  for epoch in range(3):
59
- optimizer.zero_grad()
60
- outputs = model(inputs["input_ids"])
61
- loss = criterion(outputs, labels)
62
- loss.backward()
63
- optimizer.step()
64
- print(f"[Epoch {epoch+1}] Loss: {loss.item():.4f}")
 
 
 
 
 
 
 
65
 
66
- # βœ… Save model
67
  torch.save(model.state_dict(), "trained_model.pt")
68
- print("βœ… Evo updated via retrain from feedback!")
69
  return True
70
 
71
  except Exception as e:
 
1
  # watchdog.py
2
 
 
 
 
3
  import firebase_admin
4
  from firebase_admin import credentials, firestore
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.optim as optim
8
+ from transformers import BertTokenizer
9
+ from torch.utils.data import DataLoader, Dataset
10
+ from evo_model import EvoTransformerForClassification, EvoTransformerConfig
11
 
12
+ # Initialize Firebase
13
  if not firebase_admin._apps:
14
  cred = credentials.Certificate("firebase_key.json")
15
  firebase_admin.initialize_app(cred)
16
 
17
  db = firestore.client()
18
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
19
 
20
+ # Dataset for training
21
+ class FeedbackDataset(Dataset):
22
+ def __init__(self, records, tokenizer, max_length=64):
23
+ self.records = records
24
+ self.tokenizer = tokenizer
25
+ self.max_length = max_length
26
+ self.label_map = {"Solution 1": 0, "Solution 2": 1}
27
+
28
+ def __len__(self):
29
+ return len(self.records)
30
+
31
+ def __getitem__(self, idx):
32
+ row = self.records[idx]
33
+ combined = f"Goal: {row['goal']} Option 1: {row['solution_1']} Option 2: {row['solution_2']}"
34
+ inputs = self.tokenizer(combined, padding="max_length", truncation=True,
35
+ max_length=self.max_length, return_tensors="pt")
36
+ label = self.label_map[row["correct_answer"]]
37
+ return {
38
+ "input_ids": inputs["input_ids"].squeeze(0),
39
+ "attention_mask": inputs["attention_mask"].squeeze(0),
40
+ "labels": torch.tensor(label)
41
+ }
42
+
43
+ # Manual retrain trigger
44
  def manual_retrain():
45
  try:
46
+ # Step 1: Fetch feedback data from Firestore
47
  docs = db.collection("evo_feedback_logs").stream()
48
+ feedback_data = [doc.to_dict() for doc in docs if "goal" in doc.to_dict()]
 
 
 
 
 
 
49
 
50
+ if len(feedback_data) < 5:
51
+ print("[Retrain Skipped] Not enough feedback.")
52
  return False
53
 
54
+ # Step 2: Load tokenizer and dataset
55
+ dataset = FeedbackDataset(feedback_data, tokenizer)
56
+ loader = DataLoader(dataset, batch_size=4, shuffle=True)
57
 
58
+ # Step 3: Load model
59
+ config = EvoTransformerConfig()
60
+ model = EvoTransformerForClassification(config)
61
+ model.train()
 
 
 
 
 
 
62
 
63
+ # Step 4: Define optimizer and loss
64
+ optimizer = optim.Adam(model.parameters(), lr=2e-5)
65
+ loss_fn = nn.CrossEntropyLoss()
66
 
67
+ # Step 5: Train
 
68
  for epoch in range(3):
69
+ total_loss = 0
70
+ for batch in loader:
71
+ optimizer.zero_grad()
72
+ input_ids = batch["input_ids"]
73
+ attention_mask = batch["attention_mask"]
74
+ labels = batch["labels"]
75
+
76
+ logits = model(input_ids)
77
+ loss = loss_fn(logits, labels)
78
+ loss.backward()
79
+ optimizer.step()
80
+ total_loss += loss.item()
81
+ print(f"[Retrain] Epoch {epoch + 1} Loss: {total_loss:.4f}")
82
 
83
+ # Step 6: Save updated model
84
  torch.save(model.state_dict(), "trained_model.pt")
85
+ print("βœ… Evo updated with latest feedback.")
86
  return True
87
 
88
  except Exception as e: