Spaces:
Runtime error
Runtime error
Commit
·
fa70ae5
0
Parent(s):
init
Browse files- .gitignore +2 -0
- README.md +1 -0
- agent.py +136 -0
- cli.py +163 -0
- wiki_parser.py +131 -0
- wiki_run_engine.py +82 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
*.bz2
|
2 |
+
data
|
README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
wget https://dumps.wikimedia.org/simplewiki/20250420/simplewiki-20250420-pages-articles-multistream.xml.bz2
|
agent.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# LLM agent player using HuggingFace's smolagent
|
2 |
+
from wiki_run_engine import WikiRunEnvironment
|
3 |
+
from rich.console import Console
|
4 |
+
|
5 |
+
console = Console()
|
6 |
+
|
7 |
+
try:
|
8 |
+
from smolagent import Agent, AgentConfig
|
9 |
+
except ImportError:
|
10 |
+
console.print("[red]smolagent package not found. Please install with 'uv pip install smolagent'[/red]")
|
11 |
+
raise
|
12 |
+
|
13 |
+
class AgentPlayer:
|
14 |
+
def __init__(self, wiki_data_path, model_name="HuggingFaceH4/zephyr-7b-beta"):
|
15 |
+
"""Initialize agent player"""
|
16 |
+
self.env = WikiRunEnvironment(wiki_data_path)
|
17 |
+
|
18 |
+
# Initialize LLM agent
|
19 |
+
config = AgentConfig(
|
20 |
+
model=model_name,
|
21 |
+
output_parser="json"
|
22 |
+
)
|
23 |
+
self.agent = Agent(config)
|
24 |
+
|
25 |
+
def play(self, start_article=None, target_article=None, max_steps=20):
|
26 |
+
"""Play a game of Wiki Run using the LLM agent"""
|
27 |
+
# Reset environment
|
28 |
+
state = self.env.reset(start_article, target_article)
|
29 |
+
|
30 |
+
console.print("[bold]Agent Wiki Run[/bold]")
|
31 |
+
console.print(f"Starting article: [cyan]{state['current_article']}[/cyan]")
|
32 |
+
console.print(f"Target article: [red]{state['target_article']}[/red]")
|
33 |
+
console.print()
|
34 |
+
|
35 |
+
steps = 0
|
36 |
+
while not state['is_complete'] and steps < max_steps:
|
37 |
+
console.print(f"[bold]Step {steps + 1}:[/bold]")
|
38 |
+
console.print(f"Current article: [cyan]{state['current_article']}[/cyan]")
|
39 |
+
|
40 |
+
# Create prompt for agent
|
41 |
+
prompt = self._create_agent_prompt(state)
|
42 |
+
|
43 |
+
# Get agent's decision
|
44 |
+
tool_result = self.agent.run(
|
45 |
+
prompt,
|
46 |
+
tools=[
|
47 |
+
{
|
48 |
+
"name": "choose_next_article",
|
49 |
+
"description": "Choose the next Wikipedia article to navigate to",
|
50 |
+
"parameters": {
|
51 |
+
"type": "object",
|
52 |
+
"properties": {
|
53 |
+
"article": {
|
54 |
+
"type": "string",
|
55 |
+
"description": "The title of the next article to navigate to"
|
56 |
+
},
|
57 |
+
"reasoning": {
|
58 |
+
"type": "string",
|
59 |
+
"description": "Explanation of why this article was chosen"
|
60 |
+
}
|
61 |
+
},
|
62 |
+
"required": ["article", "reasoning"]
|
63 |
+
}
|
64 |
+
}
|
65 |
+
]
|
66 |
+
)
|
67 |
+
|
68 |
+
# Extract agent's choice
|
69 |
+
choice = tool_result.get("choose_next_article", {})
|
70 |
+
next_article = choice.get("article", "")
|
71 |
+
reasoning = choice.get("reasoning", "")
|
72 |
+
|
73 |
+
console.print(f"[yellow]Agent reasoning: {reasoning}[/yellow]")
|
74 |
+
console.print(f"Agent chooses: [green]{next_article}[/green]")
|
75 |
+
|
76 |
+
# Verify the choice is valid
|
77 |
+
if next_article in state['available_links']:
|
78 |
+
state, message = self.env.step(next_article)
|
79 |
+
if message:
|
80 |
+
console.print(f"[bold]{message}[/bold]")
|
81 |
+
else:
|
82 |
+
console.print("[red]Invalid choice! Agent selected an article that's not in the available links.[/red]")
|
83 |
+
# Choose a random valid link as fallback
|
84 |
+
import random
|
85 |
+
next_article = random.choice(state['available_links'])
|
86 |
+
console.print(f"[yellow]Falling back to random choice: {next_article}[/yellow]")
|
87 |
+
state, _ = self.env.step(next_article)
|
88 |
+
|
89 |
+
steps += 1
|
90 |
+
console.print()
|
91 |
+
|
92 |
+
# Game complete
|
93 |
+
if state['is_complete']:
|
94 |
+
console.print("[bold green]Success! Agent reached the target article![/bold green]")
|
95 |
+
else:
|
96 |
+
console.print("[bold red]Failed to reach target within step limit.[/bold red]")
|
97 |
+
|
98 |
+
console.print(f"Steps taken: [bold]{state['steps_taken']}[/bold]")
|
99 |
+
console.print(f"Path: [italic]{' → '.join(state['path_taken'])}[/italic]")
|
100 |
+
|
101 |
+
return state
|
102 |
+
|
103 |
+
def _create_agent_prompt(self, state):
|
104 |
+
"""Create prompt for the agent"""
|
105 |
+
current = state['current_article']
|
106 |
+
target = state['target_article']
|
107 |
+
links = state['available_links']
|
108 |
+
|
109 |
+
prompt = f"""You are playing the Wiki Run game. Your goal is to navigate from the current Wikipedia article to the target article using only the available links.
|
110 |
+
|
111 |
+
Current article: {current}
|
112 |
+
Target article: {target}
|
113 |
+
|
114 |
+
Available links (choose one):
|
115 |
+
{', '.join(links)}
|
116 |
+
|
117 |
+
Choose the link that you think will get you closest to the target article. Consider:
|
118 |
+
1. Direct connections to the target
|
119 |
+
2. Articles that might be in the same category as the target
|
120 |
+
3. General articles that might have many links to other topics
|
121 |
+
|
122 |
+
Use the choose_next_article tool to make your selection."""
|
123 |
+
|
124 |
+
return prompt
|
125 |
+
|
126 |
+
if __name__ == "__main__":
|
127 |
+
import sys
|
128 |
+
|
129 |
+
if len(sys.argv) < 2:
|
130 |
+
console.print("[red]Please provide the path to Wikipedia data[/red]")
|
131 |
+
console.print("Usage: python agent.py <wiki_data_path>")
|
132 |
+
sys.exit(1)
|
133 |
+
|
134 |
+
wiki_data_path = sys.argv[1]
|
135 |
+
agent = AgentPlayer(wiki_data_path)
|
136 |
+
agent.play()
|
cli.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Rich CLI interface for the Wiki Run game
|
2 |
+
from rich.console import Console
|
3 |
+
from rich.panel import Panel
|
4 |
+
from rich.markdown import Markdown
|
5 |
+
from rich.prompt import Prompt
|
6 |
+
from rich.text import Text
|
7 |
+
from rich.table import Table
|
8 |
+
|
9 |
+
from wiki_run_engine import WikiRunEnvironment
|
10 |
+
|
11 |
+
console = Console()
|
12 |
+
|
13 |
+
class WikiRunCLI:
|
14 |
+
def __init__(self, wiki_data_path):
|
15 |
+
"""Initialize the CLI interface"""
|
16 |
+
self.env = WikiRunEnvironment(wiki_data_path)
|
17 |
+
self.console = Console()
|
18 |
+
|
19 |
+
def _display_article(self, state):
|
20 |
+
"""Display the current article with highlighted links"""
|
21 |
+
title = state['current_article']
|
22 |
+
text = state['article_text']
|
23 |
+
links = state['available_links']
|
24 |
+
|
25 |
+
# Create a panel with article title
|
26 |
+
console.print(Panel(f"[bold cyan]{title}[/bold cyan]",
|
27 |
+
expand=False,
|
28 |
+
border_style="cyan"))
|
29 |
+
|
30 |
+
# Display article text with highlighted links
|
31 |
+
# In a real implementation, we'd need to properly format
|
32 |
+
# the links within the text, but for MVP this is simpler
|
33 |
+
md = Markdown(text)
|
34 |
+
console.print(md)
|
35 |
+
|
36 |
+
# Display available links
|
37 |
+
console.print("\n[bold green]Available Links:[/bold green]")
|
38 |
+
|
39 |
+
# Create a table for links (3 columns)
|
40 |
+
table = Table(show_header=False, box=None)
|
41 |
+
table.add_column("Link 1", style="green")
|
42 |
+
table.add_column("Link 2", style="green")
|
43 |
+
table.add_column("Link 3", style="green")
|
44 |
+
|
45 |
+
# Add links to table (3 per row)
|
46 |
+
row = []
|
47 |
+
for i, link in enumerate(links):
|
48 |
+
row.append(f"{i+1}. {link}")
|
49 |
+
if len(row) == 3:
|
50 |
+
table.add_row(*row)
|
51 |
+
row = []
|
52 |
+
|
53 |
+
# Add any remaining links
|
54 |
+
if row:
|
55 |
+
table.add_row(*row + [''] * (3 - len(row)))
|
56 |
+
|
57 |
+
console.print(table)
|
58 |
+
|
59 |
+
def _display_game_info(self, state):
|
60 |
+
"""Display game information"""
|
61 |
+
console.print("\n[bold yellow]Game Info:[/bold yellow]")
|
62 |
+
console.print(f"Target Article: [bold red]{state['target_article']}[/bold red]")
|
63 |
+
console.print(f"Steps Taken: [bold]{state['steps_taken']}[/bold]")
|
64 |
+
console.print(f"Path: [italic]{' → '.join(state['path_taken'])}[/italic]")
|
65 |
+
|
66 |
+
def play(self):
|
67 |
+
"""Main game loop"""
|
68 |
+
console.clear()
|
69 |
+
console.print("[bold]Welcome to Wiki Run![/bold]")
|
70 |
+
console.print("Navigate from one Wikipedia article to another using only links.\n")
|
71 |
+
|
72 |
+
# Let user choose between random articles or specific ones
|
73 |
+
choice = Prompt.ask("Start with [bold]r[/bold]andom articles or [bold]s[/bold]pecific ones?",
|
74 |
+
choices=["r", "s"],
|
75 |
+
default="r")
|
76 |
+
|
77 |
+
if choice == "r":
|
78 |
+
state = self.env.reset()
|
79 |
+
else:
|
80 |
+
# Get start and target articles
|
81 |
+
available_articles = list(self.env.wiki_data.keys())
|
82 |
+
console.print(f"Available articles: {len(available_articles)}")
|
83 |
+
|
84 |
+
# Show a sample of available articles
|
85 |
+
import random
|
86 |
+
sample = random.sample(available_articles, min(10, len(available_articles)))
|
87 |
+
console.print("Sample articles:")
|
88 |
+
for article in sample:
|
89 |
+
console.print(f"- {article}")
|
90 |
+
|
91 |
+
start = Prompt.ask("Start article")
|
92 |
+
target = Prompt.ask("Target article")
|
93 |
+
|
94 |
+
state = self.env.reset(start, target)
|
95 |
+
|
96 |
+
# Main game loop
|
97 |
+
while not state['is_complete']:
|
98 |
+
console.clear()
|
99 |
+
|
100 |
+
self._display_article(state)
|
101 |
+
self._display_game_info(state)
|
102 |
+
|
103 |
+
# Get user input
|
104 |
+
choice = Prompt.ask("\nChoose a link (number) or type part of a link name",
|
105 |
+
default="")
|
106 |
+
|
107 |
+
# Process input
|
108 |
+
links = state['available_links']
|
109 |
+
next_article = None
|
110 |
+
|
111 |
+
try:
|
112 |
+
# Check if input is a number
|
113 |
+
idx = int(choice) - 1
|
114 |
+
if 0 <= idx < len(links):
|
115 |
+
next_article = links[idx]
|
116 |
+
except ValueError:
|
117 |
+
# Input is text, find closest match
|
118 |
+
matches = [link for link in links if choice.lower() in link.lower()]
|
119 |
+
if len(matches) == 1:
|
120 |
+
next_article = matches[0]
|
121 |
+
elif len(matches) > 1:
|
122 |
+
console.print("[yellow]Multiple matches found:[/yellow]")
|
123 |
+
for i, match in enumerate(matches):
|
124 |
+
console.print(f"{i+1}. {match}")
|
125 |
+
sub_choice = Prompt.ask("Choose a match (number)", default="1")
|
126 |
+
try:
|
127 |
+
idx = int(sub_choice) - 1
|
128 |
+
if 0 <= idx < len(matches):
|
129 |
+
next_article = matches[idx]
|
130 |
+
except ValueError:
|
131 |
+
pass
|
132 |
+
|
133 |
+
if next_article:
|
134 |
+
state, message = self.env.step(next_article)
|
135 |
+
if message:
|
136 |
+
console.print(f"[bold]{message}[/bold]")
|
137 |
+
if state['is_complete']:
|
138 |
+
break
|
139 |
+
else:
|
140 |
+
console.print("[red]Invalid choice. Try again.[/red]")
|
141 |
+
console.input("\nPress Enter to continue...")
|
142 |
+
|
143 |
+
# Game complete
|
144 |
+
console.clear()
|
145 |
+
console.print("[bold green]Congratulations! You've reached the target article![/bold green]")
|
146 |
+
console.print(f"You took [bold]{state['steps_taken']}[/bold] steps.")
|
147 |
+
console.print(f"Your path: [italic]{' → '.join(state['path_taken'])}[/italic]")
|
148 |
+
|
149 |
+
play_again = Prompt.ask("Play again? (y/n)", choices=["y", "n"], default="y")
|
150 |
+
if play_again == "y":
|
151 |
+
self.play()
|
152 |
+
|
153 |
+
if __name__ == "__main__":
|
154 |
+
import sys
|
155 |
+
|
156 |
+
if len(sys.argv) < 2:
|
157 |
+
console.print("[red]Please provide the path to Wikipedia data[/red]")
|
158 |
+
console.print("Usage: python cli.py <wiki_data_path>")
|
159 |
+
sys.exit(1)
|
160 |
+
|
161 |
+
wiki_data_path = sys.argv[1]
|
162 |
+
game = WikiRunCLI(wiki_data_path)
|
163 |
+
game.play()
|
wiki_parser.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import bz2
|
2 |
+
import json
|
3 |
+
import re
|
4 |
+
import xml.etree.ElementTree as ET
|
5 |
+
from collections import defaultdict
|
6 |
+
from pathlib import Path
|
7 |
+
import os
|
8 |
+
|
9 |
+
def parse_wiki_dump(dump_path, output_path, max_articles=None):
|
10 |
+
"""
|
11 |
+
Parse the Wikipedia XML dump and extract articles with their links.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
dump_path: Path to the bz2 Wikipedia dump
|
15 |
+
output_path: Path to save the extracted data
|
16 |
+
max_articles: Maximum number of articles to extract (None for all)
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
The path to the saved JSON file
|
20 |
+
"""
|
21 |
+
print(f"Parsing Wikipedia dump: {dump_path}")
|
22 |
+
|
23 |
+
# Dictionary to store articles and their links
|
24 |
+
wiki_data = {}
|
25 |
+
article_count = 0
|
26 |
+
|
27 |
+
# Namespace mapping in Wikipedia XML
|
28 |
+
ns = {
|
29 |
+
'xml': 'http://www.w3.org/XML/1998/namespace'
|
30 |
+
}
|
31 |
+
|
32 |
+
# Initialize XML parser
|
33 |
+
context = ET.iterparse(bz2.open(dump_path, 'rt', encoding='utf-8'), events=('end',))
|
34 |
+
|
35 |
+
# Process XML elements
|
36 |
+
for event, elem in context:
|
37 |
+
if elem.tag == '{http://www.mediawiki.org/xml/export-0.10/}page':
|
38 |
+
# Extract title
|
39 |
+
title_elem = elem.find('.//{http://www.mediawiki.org/xml/export-0.10/}title')
|
40 |
+
if title_elem is None:
|
41 |
+
continue
|
42 |
+
|
43 |
+
title = title_elem.text
|
44 |
+
|
45 |
+
# Skip non-main namespace
|
46 |
+
ns_elem = elem.find('.//{http://www.mediawiki.org/xml/export-0.10/}ns')
|
47 |
+
if ns_elem is not None and ns_elem.text != '0':
|
48 |
+
elem.clear()
|
49 |
+
continue
|
50 |
+
|
51 |
+
# Extract content
|
52 |
+
text_elem = elem.find('.//{http://www.mediawiki.org/xml/export-0.10/}text')
|
53 |
+
if text_elem is None or text_elem.text is None:
|
54 |
+
elem.clear()
|
55 |
+
continue
|
56 |
+
|
57 |
+
content = text_elem.text
|
58 |
+
|
59 |
+
# Extract links from content
|
60 |
+
links = extract_links(content)
|
61 |
+
|
62 |
+
# Add to wiki data
|
63 |
+
wiki_data[title] = {
|
64 |
+
'title': title,
|
65 |
+
'text': content,
|
66 |
+
'links': links
|
67 |
+
}
|
68 |
+
|
69 |
+
article_count += 1
|
70 |
+
|
71 |
+
# Print progress
|
72 |
+
if article_count % 1000 == 0:
|
73 |
+
print(f"Processed {article_count} articles...")
|
74 |
+
|
75 |
+
# Check if we've reached the maximum number of articles
|
76 |
+
if max_articles and article_count >= max_articles:
|
77 |
+
break
|
78 |
+
|
79 |
+
# Clear element to save memory
|
80 |
+
elem.clear()
|
81 |
+
|
82 |
+
print(f"Extracted {article_count} articles with their links.")
|
83 |
+
|
84 |
+
# Save data to JSON file
|
85 |
+
output_file = os.path.join(output_path, 'wiki_data.json')
|
86 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
87 |
+
json.dump(wiki_data, f)
|
88 |
+
|
89 |
+
print(f"Data saved to {output_file}")
|
90 |
+
return output_file
|
91 |
+
|
92 |
+
def extract_links(text):
|
93 |
+
"""Extract links from article wikitext"""
|
94 |
+
# Pattern to match [[Link]] or [[Link|Text]] format
|
95 |
+
links = re.findall(r'\[\[([^|\]]+)(?:\|[^\]]+)?\]\]', text)
|
96 |
+
|
97 |
+
# Process links
|
98 |
+
processed_links = []
|
99 |
+
for link in links:
|
100 |
+
# Skip non-article links
|
101 |
+
if ':' in link and not link.startswith('Category:'):
|
102 |
+
continue
|
103 |
+
|
104 |
+
# Remove any section links (with #)
|
105 |
+
link = link.split('#')[0].strip()
|
106 |
+
|
107 |
+
# Skip empty links
|
108 |
+
if not link:
|
109 |
+
continue
|
110 |
+
|
111 |
+
processed_links.append(link)
|
112 |
+
|
113 |
+
# Remove duplicates and return
|
114 |
+
return list(set(processed_links))
|
115 |
+
|
116 |
+
if __name__ == "__main__":
|
117 |
+
import argparse
|
118 |
+
|
119 |
+
parser = argparse.ArgumentParser(description='Parse Wikipedia XML dump')
|
120 |
+
parser.add_argument('dump_path', help='Path to the Wikipedia XML dump (bz2 file)')
|
121 |
+
parser.add_argument('output_path', help='Path to save the extracted data')
|
122 |
+
parser.add_argument('--max-articles', type=int, default=None,
|
123 |
+
help='Maximum number of articles to extract (default: all)')
|
124 |
+
|
125 |
+
args = parser.parse_args()
|
126 |
+
|
127 |
+
# Create output directory if it doesn't exist
|
128 |
+
os.makedirs(args.output_path, exist_ok=True)
|
129 |
+
|
130 |
+
# Parse the dump
|
131 |
+
parse_wiki_dump(args.dump_path, args.output_path, args.max_articles)
|
wiki_run_engine.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core game engine that manages:
|
2 |
+
# 1. Loading/parsing Wikipedia data
|
3 |
+
# 2. Game state management
|
4 |
+
# 3. Navigation between articles
|
5 |
+
|
6 |
+
import json
|
7 |
+
import random
|
8 |
+
|
9 |
+
class WikiRunEnvironment:
|
10 |
+
def __init__(self, wiki_data_path):
|
11 |
+
"""Initialize with path to Wikipedia data"""
|
12 |
+
self.wiki_data = self._load_wiki_data(wiki_data_path)
|
13 |
+
self.current_article = None
|
14 |
+
self.target_article = None
|
15 |
+
self.path_taken = []
|
16 |
+
self.steps = 0
|
17 |
+
|
18 |
+
def _load_wiki_data(self, path):
|
19 |
+
"""Load Wikipedia data from JSON file"""
|
20 |
+
print(f"Loading wiki data from {path}...")
|
21 |
+
with open(path, 'r', encoding='utf-8') as f:
|
22 |
+
wiki_data = json.load(f)
|
23 |
+
print(f"Loaded {len(wiki_data)} articles")
|
24 |
+
return wiki_data
|
25 |
+
|
26 |
+
def reset(self, start_article=None, target_article=None):
|
27 |
+
"""Reset the environment with new start/target articles"""
|
28 |
+
if start_article is None or target_article is None:
|
29 |
+
# Choose random articles if not specified
|
30 |
+
available_articles = list(self.wiki_data.keys())
|
31 |
+
|
32 |
+
if start_article is None:
|
33 |
+
start_article = random.choice(available_articles)
|
34 |
+
|
35 |
+
if target_article is None:
|
36 |
+
# Ensure target is different from start
|
37 |
+
remaining = [a for a in available_articles if a != start_article]
|
38 |
+
target_article = random.choice(remaining)
|
39 |
+
|
40 |
+
self.current_article = start_article
|
41 |
+
self.target_article = target_article
|
42 |
+
self.path_taken = [start_article]
|
43 |
+
self.steps = 0
|
44 |
+
|
45 |
+
return self.get_current_state()
|
46 |
+
|
47 |
+
def get_current_state(self):
|
48 |
+
"""Get the current state of the environment"""
|
49 |
+
if self.current_article is None:
|
50 |
+
return None
|
51 |
+
|
52 |
+
current = self.wiki_data.get(self.current_article, {})
|
53 |
+
return {
|
54 |
+
'current_article': self.current_article,
|
55 |
+
'target_article': self.target_article,
|
56 |
+
'article_text': current.get('text', ''),
|
57 |
+
'available_links': current.get('links', []),
|
58 |
+
'steps_taken': self.steps,
|
59 |
+
'path_taken': self.path_taken,
|
60 |
+
'is_complete': self.current_article == self.target_article
|
61 |
+
}
|
62 |
+
|
63 |
+
def step(self, next_article):
|
64 |
+
"""Take a step to the next article"""
|
65 |
+
if self.current_article is None:
|
66 |
+
return None, "Game not initialized. Call reset() first."
|
67 |
+
|
68 |
+
current = self.wiki_data.get(self.current_article, {})
|
69 |
+
available_links = current.get('links', [])
|
70 |
+
|
71 |
+
if next_article not in available_links:
|
72 |
+
return self.get_current_state(), f"Invalid link: {next_article} not in available links"
|
73 |
+
|
74 |
+
# Update state
|
75 |
+
self.current_article = next_article
|
76 |
+
self.path_taken.append(next_article)
|
77 |
+
self.steps += 1
|
78 |
+
|
79 |
+
# Check if we've reached the target
|
80 |
+
is_complete = self.current_article == self.target_article
|
81 |
+
|
82 |
+
return self.get_current_state(), "Target reached!" if is_complete else ""
|