broadfield-dev commited on
Commit
081e43b
·
verified ·
1 Parent(s): f4d5ccb

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +83 -0
utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import logging
5
+
6
+ from memory_logic import add_rule_entry, add_memory_entry
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def format_insights_for_prompt(retrieved_insights_list: list[str]) -> tuple[str, list[dict]]:
11
+ if not retrieved_insights_list: return "No specific guiding principles or learned insights retrieved.", []
12
+ parsed = []
13
+ for text in retrieved_insights_list:
14
+ match = re.match(r"\[(CORE_RULE|RESPONSE_PRINCIPLE|BEHAVIORAL_ADJUSTMENT|GENERAL_LEARNING)\|([\d\.]+?)\](.*)", text.strip(), re.DOTALL | re.IGNORECASE)
15
+ if match: parsed.append({"type": match.group(1).upper().replace(" ", "_"), "score": match.group(2), "text": match.group(3).strip(), "original": text.strip()})
16
+ else: parsed.append({"type": "GENERAL_LEARNING", "score": "0.5", "text": text.strip(), "original": text.strip()})
17
+ parsed.sort(key=lambda x: float(x["score"]) if x["score"].replace('.', '', 1).isdigit() else -1.0, reverse=True)
18
+ grouped = {"CORE_RULE": [], "RESPONSE_PRINCIPLE": [], "BEHAVIORAL_ADJUSTMENT": [], "GENERAL_LEARNING": []}
19
+ for p_item in parsed: grouped.get(p_item["type"], grouped["GENERAL_LEARNING"]).append(f"- (Score: {p_item['score']}) {p_item['text']}")
20
+ sections = [f"{k.replace('_', ' ').title()}:\n" + "\n".join(v) for k, v in grouped.items() if v]
21
+ return "\n\n".join(sections) if sections else "No guiding principles retrieved.", parsed
22
+
23
+ def load_rules_from_file(filepath: str | None, progress_callback=None):
24
+ if not filepath or not os.path.exists(filepath): return 0, 0, 0
25
+ added, skipped, errors = 0, 0, 0
26
+ with open(filepath, 'r', encoding='utf-8') as f: content = f.read()
27
+ if not content.strip(): return 0, 0, 0
28
+
29
+ potential_rules = []
30
+ if filepath.lower().endswith(".txt"):
31
+ potential_rules = content.split("\n\n---\n\n")
32
+ if len(potential_rules) == 1 and "\n" in content: potential_rules = content.splitlines()
33
+ elif filepath.lower().endswith(".jsonl"):
34
+ for line in content.splitlines():
35
+ if line.strip():
36
+ try: potential_rules.append(json.loads(line))
37
+ except json.JSONDecodeError: errors += 1
38
+
39
+ valid_rules = [r.strip() for r in potential_rules if isinstance(r, str) and r.strip()]
40
+ total = len(valid_rules)
41
+ if not total: return 0, 0, errors
42
+
43
+ for idx, rule_text in enumerate(valid_rules):
44
+ success, status_msg = add_rule_entry(rule_text)
45
+ if success: added += 1
46
+ elif status_msg == "duplicate": skipped += 1
47
+ else: errors += 1
48
+ if progress_callback: progress_callback((idx + 1) / total, f"Processed {idx+1}/{total} rules...")
49
+
50
+ logger.info(f"Loaded rules from {filepath}: Added {added}, Skipped {skipped}, Errors {errors}.")
51
+ return added, skipped, errors
52
+
53
+ def load_memories_from_file(filepath: str | None, progress_callback=None):
54
+ if not filepath or not os.path.exists(filepath): return 0, 0, 0
55
+ added, format_err, save_err = 0, 0, 0
56
+ with open(filepath, 'r', encoding='utf-8') as f: content = f.read()
57
+ if not content.strip(): return 0, 0, 0
58
+
59
+ mem_objects = []
60
+ if filepath.lower().endswith(".json"):
61
+ try:
62
+ data = json.loads(content)
63
+ mem_objects = data if isinstance(data, list) else [data]
64
+ except json.JSONDecodeError: format_err = 1
65
+ elif filepath.lower().endswith(".jsonl"):
66
+ for line in content.splitlines():
67
+ if line.strip():
68
+ try: mem_objects.append(json.loads(line))
69
+ except json.JSONDecodeError: format_err += 1
70
+
71
+ total = len(mem_objects)
72
+ if not total: return 0, format_err, 0
73
+
74
+ for idx, mem in enumerate(mem_objects):
75
+ if isinstance(mem, dict) and all(k in mem for k in ["user_input", "bot_response", "metrics"]):
76
+ success, _ = add_memory_entry(mem["user_input"], mem["metrics"], mem["bot_response"])
77
+ if success: added += 1
78
+ else: save_err += 1
79
+ else: format_err += 1
80
+ if progress_callback: progress_callback((idx + 1) / total, f"Processed {idx+1}/{total} memories...")
81
+
82
+ logger.info(f"Loaded memories from {filepath}: Added {added}, Format Errors {format_err}, Save Errors {save_err}.")
83
+ return added, format_err, save_err