stillerman commited on
Commit
4432909
Β·
1 Parent(s): 2d4d9a8

db refactor

Browse files
Files changed (5) hide show
  1. agent.py +2 -2
  2. cli.py +4 -4
  3. wiki_db_json.py +40 -0
  4. wiki_run_engine.py +10 -16
  5. wiki_solver.py +11 -10
agent.py CHANGED
@@ -84,7 +84,7 @@ class WikiRunAgent:
84
 
85
  for i, link in enumerate(links):
86
  # Check if link is available
87
- is_available = link in self.env.wiki_data
88
  status = "[green]βœ“[/green]" if is_available else "[red]βœ—[/red]"
89
  color = "green" if is_available else "red"
90
  table.add_row(
@@ -97,7 +97,7 @@ class WikiRunAgent:
97
 
98
  def _get_available_links(self, links):
99
  """Filter links to only those available in the wiki data"""
100
- return [link for link in links if link in self.env.wiki_data]
101
 
102
  @traceable(name="LLM Decision")
103
  def _get_llm_choice(self, state):
 
84
 
85
  for i, link in enumerate(links):
86
  # Check if link is available
87
+ is_available = self.env.article_exists(link)
88
  status = "[green]βœ“[/green]" if is_available else "[red]βœ—[/red]"
89
  color = "green" if is_available else "red"
90
  table.add_row(
 
97
 
98
  def _get_available_links(self, links):
99
  """Filter links to only those available in the wiki data"""
100
+ return [link for link in links if self.env.article_exists(link)]
101
 
102
  @traceable(name="LLM Decision")
103
  def _get_llm_choice(self, state):
cli.py CHANGED
@@ -47,7 +47,7 @@ class WikiRunCLI:
47
  row = []
48
  for i, link in enumerate(links):
49
  # Check if link is available in the current article
50
- is_available = link in self.env.wiki_data
51
  color = "green" if is_available else "red"
52
  row.append(f"[{color}]{i+1}. {link}[/{color}]")
53
  if len(row) == 3:
@@ -80,7 +80,7 @@ class WikiRunCLI:
80
 
81
  if choice == "p":
82
  # Solver mode
83
- available_articles = list(self.env.wiki_data.keys())
84
  console.print(f"Available articles: {len(available_articles)}")
85
 
86
  # Show a sample of available articles
@@ -94,7 +94,7 @@ class WikiRunCLI:
94
  target = Prompt.ask("Target article")
95
 
96
  # Initialize solver and find path
97
- solver = WikiSolver(self.env.wiki_data)
98
  path, error = solver.find_path(start, target)
99
 
100
  if error:
@@ -111,7 +111,7 @@ class WikiRunCLI:
111
  state = self.env.reset()
112
  else:
113
  # Get start and target articles
114
- available_articles = list(self.env.wiki_data.keys())
115
  console.print(f"Available articles: {len(available_articles)}")
116
 
117
  # Show a sample of available articles
 
47
  row = []
48
  for i, link in enumerate(links):
49
  # Check if link is available in the current article
50
+ is_available = self.env.article_exists(link)
51
  color = "green" if is_available else "red"
52
  row.append(f"[{color}]{i+1}. {link}[/{color}]")
53
  if len(row) == 3:
 
80
 
81
  if choice == "p":
82
  # Solver mode
83
+ available_articles = self.env.db.get_all_article_titles()
84
  console.print(f"Available articles: {len(available_articles)}")
85
 
86
  # Show a sample of available articles
 
94
  target = Prompt.ask("Target article")
95
 
96
  # Initialize solver and find path
97
+ solver = WikiSolver(self.env)
98
  path, error = solver.find_path(start, target)
99
 
100
  if error:
 
111
  state = self.env.reset()
112
  else:
113
  # Get start and target articles
114
+ available_articles = self.env.db.get_all_article_titles()
115
  console.print(f"Available articles: {len(available_articles)}")
116
 
117
  # Show a sample of available articles
wiki_db_json.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ class WikiDBJson:
4
+ def __init__(self, wiki_data_path):
5
+ """Initialize the database with path to Wikipedia data"""
6
+ self.wiki_data = self._load_wiki_data(wiki_data_path)
7
+
8
+ def _load_wiki_data(self, path):
9
+ """Load Wikipedia data from JSON file"""
10
+ print(f"Loading wiki data from {path}...")
11
+ with open(path, 'r', encoding='utf-8') as f:
12
+ wiki_data = json.load(f)
13
+ print(f"Loaded {len(wiki_data)} articles")
14
+ return wiki_data
15
+
16
+ def get_article_count(self):
17
+ """Return the number of articles in the database"""
18
+ return len(self.wiki_data)
19
+
20
+ def get_all_article_titles(self):
21
+ """Return a list of all article titles"""
22
+ return list(self.wiki_data.keys())
23
+
24
+ def get_article(self, title):
25
+ """Get article data by title"""
26
+ return self.wiki_data.get(title, {})
27
+
28
+ def article_exists(self, title):
29
+ """Check if an article exists in the database"""
30
+ return title in self.wiki_data
31
+
32
+ def get_article_text(self, title):
33
+ """Get the text of an article"""
34
+ article = self.get_article(title)
35
+ return article.get('text', '')
36
+
37
+ def get_article_links(self, title):
38
+ """Get the links of an article"""
39
+ article = self.get_article(title)
40
+ return article.get('links', [])
wiki_run_engine.py CHANGED
@@ -3,31 +3,23 @@
3
  # 2. Game state management
4
  # 3. Navigation between articles
5
 
6
- import json
7
  import random
 
8
 
9
  class WikiRunEnvironment:
10
  def __init__(self, wiki_data_path):
11
  """Initialize with path to Wikipedia data"""
12
- self.wiki_data = self._load_wiki_data(wiki_data_path)
13
  self.current_article = None
14
  self.target_article = None
15
  self.path_taken = []
16
  self.steps = 0
17
 
18
- def _load_wiki_data(self, path):
19
- """Load Wikipedia data from JSON file"""
20
- print(f"Loading wiki data from {path}...")
21
- with open(path, 'r', encoding='utf-8') as f:
22
- wiki_data = json.load(f)
23
- print(f"Loaded {len(wiki_data)} articles")
24
- return wiki_data
25
-
26
  def reset(self, start_article=None, target_article=None):
27
  """Reset the environment with new start/target articles"""
28
  if start_article is None or target_article is None:
29
  # Choose random articles if not specified
30
- available_articles = list(self.wiki_data.keys())
31
 
32
  if start_article is None:
33
  start_article = random.choice(available_articles)
@@ -49,12 +41,11 @@ class WikiRunEnvironment:
49
  if self.current_article is None:
50
  return None
51
 
52
- current = self.wiki_data.get(self.current_article, {})
53
  return {
54
  'current_article': self.current_article,
55
  'target_article': self.target_article,
56
- 'article_text': current.get('text', ''),
57
- 'available_links': current.get('links', []),
58
  'steps_taken': self.steps,
59
  'path_taken': self.path_taken,
60
  'is_complete': self.current_article == self.target_article
@@ -65,8 +56,7 @@ class WikiRunEnvironment:
65
  if self.current_article is None:
66
  return None, "Game not initialized. Call reset() first."
67
 
68
- current = self.wiki_data.get(self.current_article, {})
69
- available_links = current.get('links', [])
70
 
71
  if next_article not in available_links:
72
  return self.get_current_state(), f"Invalid link: {next_article} not in available links"
@@ -80,3 +70,7 @@ class WikiRunEnvironment:
80
  is_complete = self.current_article == self.target_article
81
 
82
  return self.get_current_state(), "Target reached!" if is_complete else ""
 
 
 
 
 
3
  # 2. Game state management
4
  # 3. Navigation between articles
5
 
 
6
  import random
7
+ from wiki_db_json import WikiDBJson
8
 
9
  class WikiRunEnvironment:
10
  def __init__(self, wiki_data_path):
11
  """Initialize with path to Wikipedia data"""
12
+ self.db = WikiDBJson(wiki_data_path)
13
  self.current_article = None
14
  self.target_article = None
15
  self.path_taken = []
16
  self.steps = 0
17
 
 
 
 
 
 
 
 
 
18
  def reset(self, start_article=None, target_article=None):
19
  """Reset the environment with new start/target articles"""
20
  if start_article is None or target_article is None:
21
  # Choose random articles if not specified
22
+ available_articles = self.db.get_all_article_titles()
23
 
24
  if start_article is None:
25
  start_article = random.choice(available_articles)
 
41
  if self.current_article is None:
42
  return None
43
 
 
44
  return {
45
  'current_article': self.current_article,
46
  'target_article': self.target_article,
47
+ 'article_text': self.db.get_article_text(self.current_article),
48
+ 'available_links': self.db.get_article_links(self.current_article),
49
  'steps_taken': self.steps,
50
  'path_taken': self.path_taken,
51
  'is_complete': self.current_article == self.target_article
 
56
  if self.current_article is None:
57
  return None, "Game not initialized. Call reset() first."
58
 
59
+ available_links = self.db.get_article_links(self.current_article)
 
60
 
61
  if next_article not in available_links:
62
  return self.get_current_state(), f"Invalid link: {next_article} not in available links"
 
70
  is_complete = self.current_article == self.target_article
71
 
72
  return self.get_current_state(), "Target reached!" if is_complete else ""
73
+
74
+ def article_exists(self, article_title):
75
+ """Check if an article exists in the database"""
76
+ return self.db.article_exists(article_title)
wiki_solver.py CHANGED
@@ -7,15 +7,15 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
7
  console = Console()
8
 
9
  class WikiSolver:
10
- def __init__(self, wiki_data):
11
- """Initialize the solver with wiki data"""
12
- self.wiki_data = wiki_data
13
 
14
  def find_path(self, start_article, target_article):
15
  """Find the shortest path using BFS"""
16
- if start_article not in self.wiki_data:
17
  return None, "Start article not found in wiki data"
18
- if target_article not in self.wiki_data:
19
  return None, "Target article not found in wiki data"
20
 
21
  # Initialize BFS
@@ -36,11 +36,12 @@ class WikiSolver:
36
  return path, None
37
 
38
  # Get all links from current article
39
- if current in self.wiki_data:
40
- for next_article in self.wiki_data[current].get('links', []):
41
- if next_article not in visited and next_article in self.wiki_data:
42
- visited.add(next_article)
43
- queue.append((next_article, path + [next_article]))
 
44
 
45
  return None, "No path found"
46
 
 
7
  console = Console()
8
 
9
  class WikiSolver:
10
+ def __init__(self, wiki_run_engine):
11
+ """Initialize the solver with a WikiRunEnvironment instance"""
12
+ self.engine = wiki_run_engine
13
 
14
  def find_path(self, start_article, target_article):
15
  """Find the shortest path using BFS"""
16
+ if not self.engine.article_exists(start_article):
17
  return None, "Start article not found in wiki data"
18
+ if not self.engine.article_exists(target_article):
19
  return None, "Target article not found in wiki data"
20
 
21
  # Initialize BFS
 
36
  return path, None
37
 
38
  # Get all links from current article
39
+ # Need to set current article to get links
40
+ state = self.engine.reset(current, target_article)
41
+ for next_article in state['available_links']:
42
+ if next_article not in visited and self.engine.article_exists(next_article):
43
+ visited.add(next_article)
44
+ queue.append((next_article, path + [next_article]))
45
 
46
  return None, "No path found"
47