Final_Assignment_AWorld

Sleeping

App Files Files Community

Duibonduil commited on Jun 30

Commit

d79f338

verified ·

1 Parent(s): 2814685

Upload 6 files

Browse files

Files changed (6) hide show

aworld/runners/__init__.py +2 -0
aworld/runners/call_driven_runner.py +810 -0
aworld/runners/event_runner.py +275 -0
aworld/runners/state_manager.py +332 -0
aworld/runners/task_runner.py +128 -0
aworld/runners/utils.py +111 -0

aworld/runners/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # coding: utf-8
2	+ # Copyright (c) 2025 inclusionAI.

aworld/runners/call_driven_runner.py ADDED Viewed

	@@ -0,0 +1,810 @@

+# coding: utf-8
+# Copyright (c) 2025 inclusionAI.
+import json
+import time
+import traceback
+import aworld.trace as trace
+from typing import List, Dict, Any, Tuple
+from aworld.config.conf import ToolConfig
+from aworld.core.agent.base import is_agent
+from aworld.agents.llm_agent import Agent
+from aworld.core.common import Observation, ActionModel, ActionResult
+from aworld.core.context.base import Context
+from aworld.core.event.base import Message
+from aworld.core.tool.base import ToolFactory, Tool, AsyncTool
+from aworld.core.tool.tool_desc import is_tool_by_name
+from aworld.core.task import Task, TaskResponse
+from aworld.logs.util import logger, color_log, Color, trace_logger
+from aworld.models.model_response import ToolCall
+from aworld.output.base import StepOutput, ToolResultOutput
+from aworld.runners.task_runner import TaskRunner
+from aworld.runners.utils import endless_detect
+from aworld.sandbox import Sandbox
+from aworld.tools.utils import build_observation
+from aworld.utils.common import override_in_subclass
+from aworld.utils.json_encoder import NumpyEncoder
+def action_result_transform(message: Message, sandbox: Sandbox) -> Tuple[Observation, float, bool, bool, dict]:
+    action_results = message.payload
+    result: ActionResult = action_results[-1]
+    # ignore image, dom_tree attribute, need to process them from action_results in the agent.
+    return build_observation(container_id=sandbox.sandbox_id,
+                             observer=result.tool_name,
+                             ability=result.action_name,
+                             content=result.content,
+                             action_result=action_results), 1.0, result.is_done, result.is_done, {}
+class WorkflowRunner(TaskRunner):
+    def __init__(self, task: Task, *args, **kwargs):
+        super().__init__(task=task, *args, **kwargs)
+    async def do_run(self, context: Context = None) -> TaskResponse:
+        self.max_steps = self.conf.get("max_steps", 100)
+        resp = await self._do_run(context)
+        self._task_response = resp
+        return resp
+    async def _do_run(self, context: Context = None) -> TaskResponse:
+        """Multi-agent sequence general process workflow.
+        NOTE: Use the agent's finished state(no tool calls) to control the inner loop.
+        Args:
+            observation: Observation based on env
+            info: Extend info by env
+        """
+        observation = self.observation
+        if not observation:
+            raise RuntimeError("no observation, check run process")
+        start = time.time()
+        msg = None
+        response = None
+        # Use trace.span to record the entire task execution process
+        with trace.span(f"task_execution_{self.task.id}", attributes={
+            "task_id": self.task.id,
+            "task_name": self.task.name,
+            "start_time": start
+        }) as task_span:
+            try:
+                response = await self._common_process(task_span)
+            except Exception as err:
+                logger.error(f"Runner run failed, err is {traceback.format_exc()}")
+            finally:
+                await self.outputs.mark_completed()
+                color_log(f"task token usage: {self.context.token_usage}",
+                          color=Color.pink,
+                          logger_=trace_logger)
+                for _, tool in self.tools.items():
+                    if isinstance(tool, AsyncTool):
+                        await tool.close()
+                    else:
+                        tool.close()
+                task_span.set_attributes({
+                    "end_time": time.time(),
+                    "duration": time.time() - start,
+                    "error": msg
+                })
+                # todo sandbox cleanup
+                if self.swarm and hasattr(self.swarm, 'agents') and self.swarm.agents:
+                    for agent_name, agent in self.swarm.agents.items():
+                        try:
+                            if hasattr(agent, 'sandbox') and agent.sandbox:
+                                await agent.sandbox.cleanup()
+                        except Exception as e:
+                            logger.warning(f"call_driven_runner Failed to cleanup sandbox for agent {agent_name}: {e}")
+            return response
+    async def _common_process(self, task_span):
+        start = time.time()
+        step = 1
+        pre_agent_name = None
+        observation = self.observation
+        for idx, agent in enumerate(self.swarm.ordered_agents):
+            observation.from_agent_name = agent.id()
+            observations = [observation]
+            policy = None
+            cur_agent = agent
+            while step <= self.max_steps:
+                await self.outputs.add_output(
+                    StepOutput.build_start_output(name=f"Step{step}", step_num=step))
+                terminated = False
+                observation = self.swarm.action_to_observation(policy, observations)
+                observation.from_agent_name = observation.from_agent_name or cur_agent.id()
+                if observation.to_agent_name and observation.to_agent_name != cur_agent.id():
+                    cur_agent = self.swarm.agents.get(observation.to_agent_name)
+                exp_id = self._get_step_span_id(step, cur_agent.id())
+                with trace.span(f"step_execution_{exp_id}") as step_span:
+                    try:
+                        step_span.set_attributes({
+                            "exp_id": exp_id,
+                            "task_id": self.task.id,
+                            "task_name": self.task.name,
+                            "trace_id": trace.get_current_span().get_trace_id(),
+                            "step": step,
+                            "agent_id": cur_agent.id(),
+                            "pre_agent": pre_agent_name,
+                            "observation": json.dumps(observation.model_dump(exclude_none=True),
+                                                      ensure_ascii=False,
+                                                      cls=NumpyEncoder)
+                        })
+                    except:
+                        pass
+                    pre_agent_name = cur_agent.id()
+                    if not override_in_subclass('async_policy', cur_agent.__class__, Agent):
+                        message = cur_agent.run(observation,
+                                                step=step,
+                                                outputs=self.outputs,
+                                                stream=self.conf.get("stream", False),
+                                                exp_id=exp_id)
+                    else:
+                        message = await cur_agent.async_run(observation,
+                                                            step=step,
+                                                            outputs=self.outputs,
+                                                            stream=self.conf.get("stream",
+                                                                                 False),
+                                                            exp_id=exp_id)
+                    policy = message.payload
+                    step_span.set_attribute("actions",
+                                            json.dumps([action.model_dump() for action in policy],
+                                                       ensure_ascii=False))
+                    observation.content = None
+                    color_log(f"{cur_agent.id()} policy: {policy}")
+                    if not policy:
+                        logger.warning(f"current agent {cur_agent.id()} no policy to use.")
+                        await self.outputs.add_output(
+                            StepOutput.build_failed_output(name=f"Step{step}",
+                                                           step_num=step,
+                                                           data=f"current agent {cur_agent.id()} no policy to use.")
+                        )
+                        await self.outputs.mark_completed()
+                        task_span.set_attributes({
+                            "end_time": time.time(),
+                            "duration": time.time() - start,
+                            "status": "failed",
+                            "error": f"current agent {cur_agent.id()} no policy to use."
+                        })
+                        return TaskResponse(msg=f"current agent {cur_agent.id()} no policy to use.",
+                                            answer="",
+                                            success=False,
+                                            id=self.task.id,
+                                            time_cost=(time.time() - start),
+                                            usage=self.context.token_usage)
+                    if is_agent(policy[0]):
+                        status, info = await self._agent(agent, observation, policy, step)
+                        if status == 'normal':
+                            if info:
+                                observations.append(observation)
+                        elif status == 'break':
+                            observation = self.swarm.action_to_observation(policy, observations)
+                            if idx == len(self.swarm.ordered_agents) - 1:
+                                return TaskResponse(
+                                    answer=observation.content,
+                                    success=True,
+                                    id=self.task.id,
+                                    time_cost=(time.time() - start),
+                                    usage=self.context.token_usage
+                                )
+                            break
+                        elif status == 'return':
+                            await self.outputs.add_output(
+                                StepOutput.build_finished_output(name=f"Step{step}", step_num=step)
+                            )
+                            info.time_cost = (time.time() - start)
+                            task_span.set_attributes({
+                                "end_time": time.time(),
+                                "duration": info.time_cost,
+                                "status": "success"
+                            })
+                            return info
+                    elif is_tool_by_name(policy[0].tool_name):
+                        # todo sandbox
+                        msg, reward, terminated = await self._tool_call(policy, observations, step,
+                                                                        cur_agent)
+                        step_span.set_attribute("reward", reward)
+                    else:
+                        logger.warning(f"Unrecognized policy: {policy[0]}")
+                        await self.outputs.add_output(
+                            StepOutput.build_failed_output(name=f"Step{step}",
+                                                           step_num=step,
+                                                           data=f"Unrecognized policy: {policy[0]}, need to check prompt or agent / tool.")
+                        )
+                        await self.outputs.mark_completed()
+                        task_span.set_attributes({
+                            "end_time": time.time(),
+                            "duration": time.time() - start,
+                            "status": "failed",
+                            "error": f"Unrecognized policy: {policy[0]}, need to check prompt or agent / tool."
+                        })
+                        return TaskResponse(
+                            msg=f"Unrecognized policy: {policy[0]}, need to check prompt or agent / tool.",
+                            answer="",
+                            success=False,
+                            id=self.task.id,
+                            time_cost=(time.time() - start),
+                            usage=self.context.token_usage
+                        )
+                    await self.outputs.add_output(
+                        StepOutput.build_finished_output(name=f"Step{step}",
+                                                         step_num=step, )
+                    )
+                    step += 1
+                    if terminated and agent.finished:
+                        logger.info(f"{agent.id()} finished")
+                        if idx == len(self.swarm.ordered_agents) - 1:
+                            return TaskResponse(
+                                answer=observations[-1].content,
+                                success=True,
+                                id=self.task.id,
+                                time_cost=(time.time() - start),
+                                usage=self.context.token_usage
+                            )
+                        break
+    async def _agent(self, agent: Agent, observation: Observation, policy: List[ActionModel], step: int):
+        # only one agent, and get agent from policy
+        policy_for_agent = policy[0]
+        agent_name = policy_for_agent.tool_name
+        if not agent_name:
+            agent_name = policy_for_agent.agent_name
+        cur_agent: Agent = self.swarm.agents.get(agent_name)
+        if not cur_agent:
+            raise RuntimeError(f"Can not find {agent_name} agent in swarm.")
+        status = "normal"
+        if cur_agent.id() == agent.id():
+            # Current agent is entrance agent, means need to exit to the outer loop
+            logger.info(f"{cur_agent.id()} exit the loop")
+            status = "break"
+            return status, None
+        if agent.handoffs and agent_name not in agent.handoffs:
+            # Unable to hand off, exit to the outer loop
+            status = "return"
+            return status, TaskResponse(msg=f"Can not handoffs {agent_name} agent ",
+                                        answer=observation.content,
+                                        success=False,
+                                        id=self.task.id,
+                                        usage=self.context.token_usage)
+        # Check if current agent done
+        if cur_agent.finished:
+            cur_agent._finished = False
+            logger.info(f"{cur_agent.id()} agent be be handed off, so finished state reset to False.")
+        con = policy_for_agent.policy_info
+        if policy_for_agent.params and 'content' in policy_for_agent.params:
+            con = policy_for_agent.params['content']
+        if observation:
+            observation.content = con
+        else:
+            observation = Observation(content=con)
+            return status, observation
+        return status, None
+    # todo sandbox
+    async def _tool_call(self, policy: List[ActionModel], observations: List[Observation], step: int, agent: Agent):
+        msg = None
+        terminated = False
+        # group action by tool name
+        tool_mapping = dict()
+        reward = 0.0
+        # Directly use or use tools after creation.
+        for act in policy:
+            if not self.tools or (self.tools and act.tool_name not in self.tools):
+                # dynamic only use default config in module.
+                conf = self.tools_conf.get(act.tool_name)
+                tool = ToolFactory(act.tool_name, conf=conf, asyn=conf.use_async if conf else False)
+                if isinstance(tool, Tool):
+                    tool.reset()
+                elif isinstance(tool, AsyncTool):
+                    await tool.reset()
+                tool_mapping[act.tool_name] = []
+                self.tools[act.tool_name] = tool
+            if act.tool_name not in tool_mapping:
+                tool_mapping[act.tool_name] = []
+            tool_mapping[act.tool_name].append(act)
+        for tool_name, action in tool_mapping.items():
+            # Execute action using browser tool and unpack all return values
+            if isinstance(self.tools[tool_name], Tool):
+                message = self.tools[tool_name].step(action)
+            elif isinstance(self.tools[tool_name], AsyncTool):
+                # todo sandbox
+                message = await self.tools[tool_name].step(action, agent=agent)
+            else:
+                logger.warning(f"Unsupported tool type: {self.tools[tool_name]}")
+                continue
+            observation, reward, terminated, _, info = message.payload
+            # observation, reward, terminated, _, info = action_result_transform(message, sandbox=None)
+            observations.append(observation)
+            for i, item in enumerate(action):
+                tool_output = ToolResultOutput(
+                    tool_type=tool_name,
+                    tool_name=item.tool_name,
+                    data=observation.content,
+                    origin_tool_call=ToolCall.from_dict({
+                        "function": {
+                            "name": item.action_name,
+                            "arguments": item.params,
+                        }
+                    })
+                )
+                await self.outputs.add_output(tool_output)
+            # Check if there's an exception in info
+            if info.get("exception"):
+                color_log(f"Step {step} failed with exception: {info['exception']}", color=Color.red)
+                msg = f"Step {step} failed with exception: {info['exception']}"
+            logger.info(f"step: {step} finished by tool action: {action}.")
+            log_ob = Observation(content='' if observation.content is None else observation.content,
+                                 action_result=observation.action_result)
+            trace_logger.info(f"{tool_name} observation: {log_ob}", color=Color.green)
+        return msg, reward, terminated
+    def _get_step_span_id(self, step, cur_agent_name):
+        key = (step, cur_agent_name)
+        if key not in self.step_agent_counter:
+            self.step_agent_counter[key] = 0
+        else:
+            self.step_agent_counter[key] += 1
+        exp_index = self.step_agent_counter[key]
+        return f"{self.task.id}_{step}_{cur_agent_name}_{exp_index}"
+class LoopWorkflowRunner(WorkflowRunner):
+    async def _do_run(self, context: Context = None) -> TaskResponse:
+        observation = self.observation
+        if not observation:
+            raise RuntimeError("no observation, check run process")
+        start = time.time()
+        step = 1
+        msg = None
+        # Use trace.span to record the entire task execution process
+        with trace.span(f"task_execution_{self.task.id}", attributes={
+            "task_id": self.task.id,
+            "task_name": self.task.name,
+            "start_time": start
+        }) as task_span:
+            try:
+                for i in range(self.max_steps):
+                    await self._common_process(task_span)
+                    step += 1
+            except Exception as err:
+                logger.error(f"Runner run failed, err is {traceback.format_exc()}")
+            finally:
+                await self.outputs.mark_completed()
+                color_log(f"task token usage: {self.context.token_usage}",
+                          color=Color.pink,
+                          logger_=trace_logger)
+                for _, tool in self.tools.items():
+                    if isinstance(tool, AsyncTool):
+                        await tool.close()
+                    else:
+                        tool.close()
+                task_span.set_attributes({
+                    "end_time": time.time(),
+                    "duration": time.time() - start,
+                    "error": msg
+                })
+            return TaskResponse(msg=msg,
+                                answer=observation.content,
+                                success=True if not msg else False,
+                                id=self.task.id,
+                                time_cost=(time.time() - start),
+                                usage=self.context.token_usage)
+class HandoffRunner(TaskRunner):
+    def __init__(self, task: Task, *args, **kwargs):
+        super().__init__(task=task, *args, **kwargs)
+    async def do_run(self, context: Context = None) -> TaskResponse:
+        resp = await self._do_run(context)
+        self._task_response = resp
+        return resp
+    async def _do_run(self, context: Context = None) -> TaskResponse:
+        """Multi-agent general process based on handoff.
+        NOTE: Use the agent's finished state to control the loop, so the agent must carefully set finished state.
+        Args:
+            context: Context of runner.
+        """
+        start = time.time()
+        observation = self.observation
+        info = dict()
+        step = 0
+        max_steps = self.conf.get("max_steps", 100)
+        results = []
+        swarm_resp = None
+        self.loop_detect = []
+        # Use trace.span to record the entire task execution process
+        with trace.span(f"task_execution_{self.task.id}", attributes={
+            "task_id": self.task.id,
+            "task_name": self.task.name,
+            "start_time": start
+        }) as task_span:
+            try:
+                while step < max_steps:
+                    # Loose protocol
+                    result_dict = await self._process(observation=observation, info=info)
+                    results.append(result_dict)
+                    swarm_resp = result_dict.get("response")
+                    logger.info(f"Step: {step} response:\n {result_dict}")
+                    step += 1
+                    if self.swarm.finished or endless_detect(self.loop_detect,
+                                                             self.endless_threshold,
+                                                             self.swarm.communicate_agent.id()):
+                        logger.info("task done!")
+                        break
+                    if not swarm_resp:
+                        logger.warning(f"Step: {step} swarm no valid response")
+                        break
+                    observation = result_dict.get("observation")
+                    if not observation:
+                        observation = Observation(content=swarm_resp)
+                    else:
+                        observation.content = swarm_resp
+                time_cost = time.time() - start
+                if not results:
+                    logger.warning("task no result!")
+                    task_span.set_attributes({
+                        "status": "failed",
+                        "error": f"task no result!"
+                    })
+                    return TaskResponse(msg=traceback.format_exc(),
+                                        answer='',
+                                        success=False,
+                                        id=self.task.id,
+                                        time_cost=time_cost,
+                                        usage=self.context.token_usage)
+                answer = results[-1].get('observation').content if results[-1].get('observation') else swarm_resp
+                return TaskResponse(answer=answer,
+                                    success=True,
+                                    id=self.task.id,
+                                    time_cost=(time.time() - start),
+                                    usage=self.context.token_usage)
+            except Exception as e:
+                logger.error(f"Task execution failed with error: {str(e)}\n{traceback.format_exc()}")
+                task_span.set_attributes({
+                    "status": "failed",
+                    "error": f"Task execution failed with error: {str(e)}\n{traceback.format_exc()}"
+                })
+                return TaskResponse(msg=traceback.format_exc(),
+                                    answer='',
+                                    success=False,
+                                    id=self.task.id,
+                                    time_cost=(time.time() - start),
+                                    usage=self.context.token_usage)
+            finally:
+                color_log(f"task token usage: {self.context.token_usage}",
+                          color=Color.pink,
+                          logger_=trace_logger)
+                for _, tool in self.tools.items():
+                    if isinstance(tool, AsyncTool):
+                        await tool.close()
+                    else:
+                        tool.close()
+                task_span.set_attributes({
+                    "end_time": time.time(),
+                    "duration": time.time() - start,
+                })
+    async def _process(self, observation, info) -> Dict[str, Any]:
+        if not self.swarm.initialized:
+            raise RuntimeError("swarm needs to use `reset` to init first.")
+        start = time.time()
+        step = 0
+        max_steps = self.conf.get("max_steps", 100)
+        self.swarm.cur_agent = self.swarm.communicate_agent
+        pre_agent_name = None
+        # use communicate agent every time
+        if override_in_subclass('async_policy', self.swarm.cur_agent.__class__, Agent):
+            message = self.swarm.cur_agent.run(observation,
+                                               step=step,
+                                               outputs=self.outputs,
+                                               stream=self.conf.get("stream", False))
+        else:
+            message = await self.swarm.cur_agent.async_run(observation,
+                                                           step=step,
+                                                           outputs=self.outputs,
+                                                           stream=self.conf.get("stream", False))
+        self.loop_detect.append(self.swarm.cur_agent.id())
+        policy = message.payload
+        if not policy:
+            logger.warning(f"current agent {self.swarm.cur_agent.id()} no policy to use.")
+            exp_id = self._get_step_span_id(step, self.swarm.cur_agent.id())
+            with trace.span(f"step_execution_{exp_id}") as step_span:
+                step_span.set_attributes({
+                    "exp_id": exp_id,
+                    "task_id": self.task.id,
+                    "task_name": self.task.name,
+                    "trace_id": trace.get_current_span().get_trace_id(),
+                    "step": step,
+                    "agent_id": self.swarm.cur_agent.id(),
+                    "pre_agent": pre_agent_name,
+                    "observation": json.dumps(observation.model_dump(exclude_none=True),
+                                              ensure_ascii=False,
+                                              cls=NumpyEncoder),
+                    "actions": json.dumps([action.model_dump() for action in policy], ensure_ascii=False)
+                })
+            return {"msg": f"current agent {self.swarm.cur_agent.id()} no policy to use.",
+                    "steps": step,
+                    "success": False,
+                    "time_cost": (time.time() - start)}
+        color_log(f"{self.swarm.cur_agent.id()} policy: {policy}")
+        msg = None
+        response = None
+        return_entry = False
+        cur_agent = None
+        cur_observation = observation
+        finished = False
+        try:
+            while step < max_steps:
+                terminated = False
+                exp_id = self._get_step_span_id(step, self.swarm.cur_agent.id())
+                with trace.span(f"step_execution_{exp_id}") as step_span:
+                    try:
+                        step_span.set_attributes({
+                            "exp_id": exp_id,
+                            "task_id": self.task.id,
+                            "task_name": self.task.name,
+                            "trace_id": trace.get_current_span().get_trace_id(),
+                            "step": step,
+                            "agent_id": self.swarm.cur_agent.id(),
+                            "pre_agent": pre_agent_name,
+                            "observation": json.dumps(cur_observation.model_dump(exclude_none=True),
+                                                      ensure_ascii=False,
+                                                      cls=NumpyEncoder),
+                            "actions": json.dumps([action.model_dump() for action in policy], ensure_ascii=False)
+                        })
+                    except:
+                        pass
+                    if is_agent(policy[0]):
+                        status, info, ob = await self._social_agent(policy, step)
+                        if status == 'normal':
+                            self.swarm.cur_agent = self.swarm.agents.get(policy[0].agent_name)
+                            policy = info
+                        cur_observation = ob
+                        # clear observation
+                        observation = None
+                    elif is_tool_by_name(policy[0].tool_name):
+                        status, terminated, info = await self._social_tool_call(policy, step)
+                        if status == 'normal':
+                            observation = info
+                            cur_observation = observation
+                    else:
+                        logger.warning(f"Unrecognized policy: {policy[0]}")
+                        return {"msg": f"Unrecognized policy: {policy[0]}, need to check prompt or agent / tool.",
+                                "response": "",
+                                "steps": step,
+                                "success": False}
+                    if status == 'break':
+                        return_entry = info
+                        break
+                    elif status == 'return':
+                        return info
+                step += 1
+                pre_agent_name = self.swarm.cur_agent.id()
+                if terminated and self.swarm.cur_agent.finished:
+                    logger.info(f"{self.swarm.cur_agent.id()} finished")
+                    break
+                if observation:
+                    if cur_agent is None:
+                        cur_agent = self.swarm.cur_agent
+                    if not override_in_subclass('async_policy', cur_agent.__class__, Agent):
+                        message = cur_agent.run(observation,
+                                                step=step,
+                                                outputs=self.outputs,
+                                                stream=self.conf.get("stream", False))
+                    else:
+                        message = await cur_agent.async_run(observation,
+                                                            step=step,
+                                                            outputs=self.outputs,
+                                                            stream=self.conf.get("stream", False))
+                    policy = message.payload
+                    color_log(f"{cur_agent.id()} policy: {policy}")
+            if policy:
+                response = policy[0].policy_info if policy[0].policy_info else policy[0].action_name
+                # All agents or tools have completed their tasks
+            if all(agent.finished for _, agent in self.swarm.agents.items()) or (all(
+                    tool.finished for _, tool in self.tools.items()) and len(self.swarm.agents) == 1):
+                logger.info("entry agent finished, swarm process finished.")
+                finished = True
+            if return_entry and not finished:
+                # Return to the entrance, reset current agent finished state
+                self.swarm.cur_agent._finished = False
+            return {"steps": step,
+                    "response": response,
+                    "observation": observation,
+                    "msg": msg,
+                    "success": True if not msg else False}
+        except Exception as e:
+            logger.error(f"Task execution failed with error: {str(e)}\n{traceback.format_exc()}")
+            return {
+                "msg": str(e),
+                "response": "",
+                "traceback": traceback.format_exc(),
+                "steps": step,
+                "success": False
+            }
+    async def _social_agent(self, policy: List[ActionModel], step):
+        # only one agent, and get agent from policy
+        policy_for_agent = policy[0]
+        agent_name = policy_for_agent.tool_name
+        if not agent_name:
+            agent_name = policy_for_agent.agent_name
+        cur_agent: Agent = self.swarm.agents.get(agent_name)
+        if not cur_agent:
+            raise RuntimeError(f"Can not find {agent_name} agent in swarm.")
+        if cur_agent.id() == self.swarm.communicate_agent.id() or cur_agent.id() == self.swarm.cur_agent.id():
+            # Current agent is entrance agent, means need to exit to the outer loop
+            logger.info(f"{cur_agent.id()} exit to the outer loop")
+            return 'break', True, None
+        if self.swarm.cur_agent.handoffs and agent_name not in self.swarm.cur_agent.handoffs:
+            # Unable to hand off, exit to the outer loop
+            return "return", {"msg": f"Can not handoffs {agent_name} agent "
+                                     f"by {cur_agent.id()} agent.",
+                              "response": policy[0].policy_info if policy else "",
+                              "steps": step,
+                              "success": False}, None
+        # Check if current agent done
+        if cur_agent.finished:
+            cur_agent._finished = False
+            logger.info(f"{cur_agent.id()} agent be be handed off, so finished state reset to False.")
+        observation = Observation(content=policy_for_agent.policy_info)
+        self.loop_detect.append(cur_agent.id())
+        if cur_agent.step_reset:
+            cur_agent.reset({"task": observation.content,
+                             "tool_names": cur_agent.tool_names,
+                             "agent_names": cur_agent.handoffs,
+                             "mcp_servers": cur_agent.mcp_servers})
+        if not override_in_subclass('async_policy', cur_agent.__class__, Agent):
+            message = cur_agent.run(observation,
+                                    step=step,
+                                    outputs=self.outputs,
+                                    stream=self.conf.get("stream", False))
+        else:
+            message = await cur_agent.async_run(observation,
+                                                step=step,
+                                                outputs=self.outputs,
+                                                stream=self.conf.get("stream", False))
+        agent_policy = message.payload
+        if not agent_policy:
+            logger.warning(
+                f"{observation} can not get the valid policy in {policy_for_agent.agent_name}, exit task!")
+            return "return", {"msg": f"{policy_for_agent.agent_name} invalid policy",
+                              "response": "",
+                              "steps": step,
+                              "success": False}, None
+        color_log(f"{cur_agent.id()} policy: {agent_policy}")
+        return 'normal', agent_policy, observation
+    async def _social_tool_call(self, policy: List[ActionModel], step: int):
+        observation = None
+        terminated = False
+        # group action by tool name
+        tool_mapping = dict()
+        # Directly use or use tools after creation.
+        for act in policy:
+            if not self.tools or (self.tools and act.tool_name not in self.tools):
+                # dynamic only use default config in module.
+                conf: ToolConfig = self.tools_conf.get(act.tool_name)
+                tool = ToolFactory(act.tool_name, conf=conf, asyn=conf.use_async if conf else False)
+                if isinstance(tool, Tool):
+                    tool.reset()
+                elif isinstance(tool, AsyncTool):
+                    await tool.reset()
+                tool_mapping[act.tool_name] = []
+                self.tools[act.tool_name] = tool
+            if act.tool_name not in tool_mapping:
+                tool_mapping[act.tool_name] = []
+            tool_mapping[act.tool_name].append(act)
+        for tool_name, action in tool_mapping.items():
+            # Execute action using browser tool and unpack all return values
+            if isinstance(self.tools[tool_name], Tool):
+                message = self.tools[tool_name].step(action)
+            elif isinstance(self.tools[tool_name], AsyncTool):
+                message = await self.tools[tool_name].step(action)
+            else:
+                logger.warning(f"Unsupported tool type: {self.tools[tool_name]}")
+                continue
+            observation, reward, terminated, _, info = message.payload
+            for i, item in enumerate(action):
+                tool_output = ToolResultOutput(data=observation.content, origin_tool_call=ToolCall.from_dict({
+                    "function": {
+                        "name": item.action_name,
+                        "arguments": item.params,
+                    }
+                }))
+                await self.outputs.add_output(tool_output)
+            # Check if there's an exception in info
+            if info.get("exception"):
+                color_log(f"Step {step} failed with exception: {info['exception']}", color=Color.red)
+            logger.info(f"step: {step} finished by tool action {action}.")
+            log_ob = Observation(content='' if observation.content is None else observation.content,
+                                 action_result=observation.action_result)
+            color_log(f"{tool_name} observation: {log_ob}", color=Color.green)
+        # The tool results give itself, exit; give to other agents, continue
+        tmp_name = policy[0].agent_name
+        if self.swarm.cur_agent.id() == self.swarm.communicate_agent.id() and (
+                len(self.swarm.agents) == 1 or tmp_name is None or self.swarm.cur_agent.id() == tmp_name):
+            return "break", terminated, True
+        elif policy[0].agent_name:
+            policy_for_agent = policy[0]
+            agent_name = policy_for_agent.agent_name
+            if not agent_name:
+                agent_name = policy_for_agent.tool_name
+            cur_agent: Agent = self.swarm.agents.get(agent_name)
+            if not cur_agent:
+                raise RuntimeError(f"Can not find {agent_name} agent in swarm.")
+            if self.swarm.cur_agent.handoffs and agent_name not in self.swarm.cur_agent.handoffs:
+                # Unable to hand off, exit to the outer loop
+                return "return", {"msg": f"Can not handoffs {agent_name} agent "
+                                         f"by {cur_agent.id()} agent.",
+                                  "response": policy[0].policy_info if policy else "",
+                                  "steps": step,
+                                  "success": False}
+            # Check if current agent done
+            if cur_agent.finished:
+                cur_agent._finished = False
+                logger.info(f"{cur_agent.id()} agent be be handed off, so finished state reset to False.")
+        return "normal", terminated, observation
+    def _get_step_span_id(self, step, cur_agent_name):
+        key = (step, cur_agent_name)
+        if key not in self.step_agent_counter:
+            self.step_agent_counter[key] = 0
+        else:
+            self.step_agent_counter[key] += 1
+        exp_index = self.step_agent_counter[key]
+        return f"{self.task.id}_{step}_{cur_agent_name}_{exp_index}"

aworld/runners/event_runner.py ADDED Viewed

	@@ -0,0 +1,275 @@

+# coding: utf-8
+# Copyright (c) 2025 inclusionAI.
+import asyncio
+import time
+import traceback
+import aworld.trace as trace
+from typing import List, Callable, Any
+from aworld.core.common import TaskItem
+from aworld.core.context.base import Context
+from aworld.agents.llm_agent import Agent
+from aworld.core.event.base import Message, Constants, TopicType, ToolMessage, AgentMessage
+from aworld.core.task import Task, TaskResponse
+from aworld.events.manager import EventManager
+from aworld.logs.util import logger
+from aworld.runners.handler.agent import DefaultAgentHandler, AgentHandler
+from aworld.runners.handler.base import DefaultHandler
+from aworld.runners.handler.output import DefaultOutputHandler
+from aworld.runners.handler.task import DefaultTaskHandler, TaskHandler
+from aworld.runners.handler.tool import DefaultToolHandler, ToolHandler
+from aworld.runners.task_runner import TaskRunner
+from aworld.utils.common import override_in_subclass, new_instance
+from aworld.runners.state_manager import EventRuntimeStateManager
+class TaskEventRunner(TaskRunner):
+    """Event driven task runner."""
+    def __init__(self, task: Task, *args, **kwargs):
+        super().__init__(task, *args, **kwargs)
+        self._task_response = None
+        self.event_mng = EventManager(self.context)
+        self.hooks = {}
+        self.background_tasks = set()
+        self.state_manager = EventRuntimeStateManager.instance()
+    async def pre_run(self):
+        await super().pre_run()
+        if self.swarm and not self.swarm.max_steps:
+            self.swarm.max_steps = self.task.conf.get('max_steps', 10)
+        observation = self.observation
+        if not observation:
+            raise RuntimeError("no observation, check run process")
+        self._build_first_message()
+        if self.swarm:
+            # register agent handler
+            for _, agent in self.swarm.agents.items():
+                agent.set_tools_instances(self.tools, self.tools_conf)
+                if agent.handler:
+                    await self.event_mng.register(Constants.AGENT, agent.id(), agent.handler)
+                else:
+                    if override_in_subclass('async_policy', agent.__class__, Agent):
+                        await self.event_mng.register(Constants.AGENT, agent.id(), agent.async_run)
+                    else:
+                        await self.event_mng.register(Constants.AGENT, agent.id(), agent.run)
+        # register tool handler
+        for key, tool in self.tools.items():
+            if tool.handler:
+                await self.event_mng.register(Constants.TOOL, tool.name(), tool.handler)
+            else:
+                await self.event_mng.register(Constants.TOOL, tool.name(), tool.step)
+            handlers = self.event_mng.event_bus.get_topic_handlers(
+                Constants.TOOL, tool.name())
+            if not handlers:
+                await self.event_mng.register(Constants.TOOL, Constants.TOOL, tool.step)
+        self._stopped = asyncio.Event()
+        # handler of process in framework
+        handler_list = self.conf.get("handlers")
+        if handler_list:
+            handlers = []
+            for hand in handler_list:
+                handlers.append(new_instance(hand, self))
+            has_task_handler = False
+            has_tool_handler = False
+            has_agent_handler = False
+            for hand in handlers:
+                if isinstance(hand, TaskHandler):
+                    has_task_handler = True
+                elif isinstance(hand, ToolHandler):
+                    has_tool_handler = True
+                elif isinstance(hand, AgentHandler):
+                    has_agent_handler = True
+            if not has_agent_handler:
+                self.handlers.append(DefaultAgentHandler(runner=self))
+            if not has_tool_handler:
+                self.handlers.append(DefaultToolHandler(runner=self))
+            if not has_task_handler:
+                self.handlers.append(DefaultTaskHandler(runner=self))
+            self.handlers = handlers
+        else:
+            self.handlers = [DefaultAgentHandler(runner=self),
+                             DefaultToolHandler(runner=self),
+                             DefaultTaskHandler(runner=self),
+                             DefaultOutputHandler(runner=self)]
+    def _build_first_message(self):
+        # build the first message
+        if self.agent_oriented:
+            self.init_message = AgentMessage(payload=self.observation,
+                                             sender='runner',
+                                             receiver=self.swarm.communicate_agent.id(),
+                                             session_id=self.context.session_id,
+                                             headers={'context': self.context})
+        else:
+            actions = self.observation.content
+            receiver = actions[0].tool_name
+            self.init_message = ToolMessage(payload=self.observation.content,
+                                            sender='runner',
+                                            receiver=receiver,
+                                            session_id=self.context.session_id,
+                                            headers={'context': self.context})
+    async def _common_process(self, message: Message) -> List[Message]:
+        event_bus = self.event_mng.event_bus
+        key = message.category
+        transformer = event_bus.get_transform_handlers(key)
+        if transformer:
+            message = await event_bus.transform(message, handler=transformer)
+        results = []
+        handlers = event_bus.get_handlers(key)
+        async with trace.message_span(message=message):
+            self.state_manager.start_message_node(message)
+            if handlers:
+                if message.topic:
+                    handlers = {message.topic: handlers.get(message.topic, [])}
+                elif message.receiver:
+                    handlers = {message.receiver: handlers.get(
+                        message.receiver, [])}
+                for topic, handler_list in handlers.items():
+                    if not handler_list:
+                        logger.warning(f"{topic} no handler, ignore.")
+                        continue
+                    for handler in handler_list:
+                        t = asyncio.create_task(
+                            self._handle_task(message, handler))
+                        self.background_tasks.add(t)
+                        t.add_done_callback(self.background_tasks.discard)
+            else:
+                # not handler, return raw message
+                results.append(message)
+                t = asyncio.create_task(self._raw_task(results))
+                self.background_tasks.add(t)
+                t.add_done_callback(self.background_tasks.discard)
+                # wait until it is complete
+                await t
+            self.state_manager.end_message_node(message)
+            return results
+    async def _handle_task(self, message: Message, handler: Callable[..., Any]):
+        con = message
+        async with trace.span(handler.__name__):
+            try:
+                logger.info(
+                    f"event_runner _handle_task start, message: {message.id}")
+                if asyncio.iscoroutinefunction(handler):
+                    con = await handler(con)
+                else:
+                    con = handler(con)
+                logger.info(f"event_runner _handle_task message= {message.id}")
+                if isinstance(con, Message):
+                    # process in framework
+                    self.state_manager.save_message_handle_result(name=handler.__name__,
+                                                                  message=message,
+                                                                  result=con)
+                    async for event in self._inner_handler_process(
+                            results=[con],
+                            handlers=self.handlers
+                    ):
+                        await self.event_mng.emit_message(event)
+                else:
+                    self.state_manager.save_message_handle_result(name=handler.__name__,
+                                                                  message=message)
+            except Exception as e:
+                logger.warning(
+                    f"{handler} process fail. {traceback.format_exc()}")
+                error_msg = Message(
+                    category=Constants.TASK,
+                    payload=TaskItem(msg=str(e), data=message),
+                    sender=self.name,
+                    session_id=Context.instance().session_id,
+                    topic=TopicType.ERROR
+                )
+                self.state_manager.save_message_handle_result(name=handler.__name__,
+                                                              message=message,
+                                                              result=error_msg)
+                await self.event_mng.event_bus.publish(error_msg)
+    async def _raw_task(self, messages: List[Message]):
+        # process in framework
+        async for event in self._inner_handler_process(
+                results=messages,
+                handlers=self.handlers
+        ):
+            await self.event_mng.emit_message(event)
+    async def _inner_handler_process(self, results: List[Message], handlers: List[DefaultHandler]):
+        # can use runtime backend to parallel
+        for handler in handlers:
+            for result in results:
+                async for event in handler.handle(result):
+                    yield event
+    async def _do_run(self):
+        """Task execution process in real."""
+        start = time.time()
+        msg = None
+        answer = None
+        try:
+            while True:
+                if await self.is_stopped():
+                    await self.event_mng.done()
+                    logger.info("stop task...")
+                    if self._task_response is None:
+                        # send msg to output
+                        self._task_response = TaskResponse(msg=msg,
+                                                           answer=answer,
+                                                           success=True if not msg else False,
+                                                           id=self.task.id,
+                                                           time_cost=(
+                                                                   time.time() - start),
+                                                           usage=self.context.token_usage)
+                    break
+                # consume message
+                message: Message = await self.event_mng.consume()
+                # use registered handler to process message
+                await self._common_process(message)
+        except Exception as e:
+            logger.error(f"consume message fail. {traceback.format_exc()}")
+        finally:
+            if await self.is_stopped():
+                await self.task.outputs.mark_completed()
+                # todo sandbox cleanup
+                if self.swarm and hasattr(self.swarm, 'agents') and self.swarm.agents:
+                    for agent_name, agent in self.swarm.agents.items():
+                        try:
+                            if hasattr(agent, 'sandbox') and agent.sandbox:
+                                await agent.sandbox.cleanup()
+                        except Exception as e:
+                            logger.warning(
+                                f"event_runner Failed to cleanup sandbox for agent {agent_name}: {e}")
+    async def do_run(self, context: Context = None):
+        if self.swarm and not self.swarm.initialized:
+            raise RuntimeError("swarm needs to use `reset` to init first.")
+        async with trace.span("Task_" + self.init_message.session_id):
+            await self.event_mng.emit_message(self.init_message)
+            await self._do_run()
+            return self._task_response
+    async def stop(self):
+        self._stopped.set()
+    async def is_stopped(self):
+        return self._stopped.is_set()
+    def response(self):
+        return self._task_response

aworld/runners/state_manager.py ADDED Viewed

	@@ -0,0 +1,332 @@

+import time
+from pydantic import BaseModel
+from typing import Optional, List
+from aworld.core.event.base import Message
+from enum import Enum
+from abc import ABC, abstractmethod, ABCMeta
+from aworld.core.agent.base import is_agent_by_name
+from aworld.core.tool.tool_desc import is_tool_by_name
+from aworld.core.singleton import InheritanceSingleton, SingletonMeta
+from aworld.core.event.base import Constants
+from aworld.logs.util import logger
+class RunNodeBusiType(Enum):
+    AGENT = 'AGENT'
+    TOOL = 'TOOL'
+    TASK = 'TASK'
+    @staticmethod
+    def from_message_category(category: str) -> 'RunNodeBusiType':
+        if category == Constants.AGENT:
+            return RunNodeBusiType.AGENT
+        if category == Constants.TOOL:
+            return RunNodeBusiType.TOOL
+        if category == Constants.TASK:
+            return RunNodeBusiType.TASK
+        return None
+class RunNodeStatus(Enum):
+    INIT = 'INIT'
+    RUNNING = 'RUNNING'
+    BREAKED = 'BREAKED'
+    SUCCESS = 'SUCCESS'
+    FAILED = 'FAILED'
+    TIMEOUNT = 'TIMEOUNT'
+class HandleResult(BaseModel):
+    name: str = None
+    status: RunNodeStatus = None
+    result_msg: Optional[str] = None
+    result: Optional[Message] = None
+class RunNode(BaseModel):
+    # {busi_id}_{busi_type}
+    node_id: Optional[str] = None
+    busi_type: str = None
+    busi_id: str = None
+    session_id: str = None
+    msg_id: Optional[str] = None  # input message id
+    # busi_id of node that send the input message
+    msg_from: Optional[str] = None
+    parent_node_id: Optional[str] = None
+    status: RunNodeStatus = None
+    result_msg: Optional[str] = None
+    results: Optional[List[HandleResult]] = None
+    create_time: Optional[float] = None
+    execute_time: Optional[float] = None
+    end_time: Optional[float] = None
+class StateStorage(ABC):
+    @abstractmethod
+    def get(self, node_id: str) -> RunNode:
+        pass
+    @abstractmethod
+    def insert(self, node: RunNode):
+        pass
+    @abstractmethod
+    def update(self, node: RunNode):
+        pass
+    @abstractmethod
+    def query(self, session_id: str) -> List[RunNode]:
+        pass
+class StateStorageMeta(SingletonMeta, ABCMeta):
+    pass
+class InMemoryStateStorage(StateStorage, InheritanceSingleton, metaclass=StateStorageMeta):
+    '''
+    In memory state storage
+    '''
+    def __init__(self, max_session=1000):
+        self._max_session = max_session
+        self._nodes = {}  # {node_id: RunNode}
+        self._ordered_session_ids = []
+        self._session_nodes = {}  # {session_id: [RunNode, RunNode]}
+    def get(self, node_id: str) -> RunNode:
+        return self._nodes.get(node_id)
+    def insert(self, node: RunNode):
+        if node.session_id not in self._ordered_session_ids:
+            self._ordered_session_ids.append(node.session_id)
+            self._session_nodes.update({node.session_id: []})
+        if node.node_id not in self._nodes:
+            self._nodes.update({node.node_id: node})
+            self._session_nodes[node.session_id].append(node)
+        if len(self._ordered_session_ids) > self._max_session:
+            oldest_session_id = self._ordered_session_ids.pop(0)
+            session_nodes = self._session_nodes.pop(oldest_session_id)
+            for node in session_nodes:
+                self._nodes.pop(node.node_id)
+        # logger.info(f"storage nodes: {self._nodes}")
+    def update(self, node: RunNode):
+        self._nodes[node.node_id] = node
+    def query(self, session_id: str, msg_id: str = None) -> List[RunNode]:
+        session_nodes = self._session_nodes.get(session_id, [])
+        if msg_id:
+            return [node for node in session_nodes if node.msg_id == msg_id]
+        return session_nodes
+class RuntimeStateManager(InheritanceSingleton):
+    '''
+    Runtime state manager
+    '''
+    def __init__(self, storage: StateStorage = InMemoryStateStorage.instance()):
+        self.storage = storage
+    def create_node(self,
+                    busi_type: RunNodeBusiType,
+                    busi_id: str,
+                    session_id: str,
+                    node_id: str = None,
+                    parent_node_id: str = None,
+                    msg_id: str = None,
+                    msg_from: str = None) -> RunNode:
+        '''
+            create node and insert to storage
+        '''
+        node_id = node_id or msg_id
+        node = self._find_node(node_id)
+        if node:
+            # raise Exception(f"node already exist, node_id: {node_id}")
+            return
+        if parent_node_id:
+            parent_node = self._find_node(parent_node_id)
+            if not parent_node:
+                logger.warning(
+                    f"parent node not exist, parent_node_id: {parent_node_id}")
+        node = RunNode(node_id=node_id,
+                       busi_type=busi_type,
+                       busi_id=busi_id,
+                       session_id=session_id,
+                       msg_id=msg_id,
+                       msg_from=msg_from,
+                       parent_node_id=parent_node_id,
+                       status=RunNodeStatus.INIT,
+                       create_time=time.time())
+        self.storage.insert(node)
+        return node
+    def run_node(self, node_id: str):
+        '''
+            set node status to RUNNING and update to storage
+        '''
+        node = self._node_exist(node_id)
+        node.status = RunNodeStatus.RUNNING
+        node.execute_time = time.time()
+        self.storage.update(node)
+    def save_result(self,
+                    node_id: str,
+                    result: HandleResult):
+        '''
+            save node execute result and update to storage
+        '''
+        node = self._node_exist(node_id)
+        if not node.results:
+            node.results = []
+        node.results.append(result)
+        self.storage.update(node)
+    def break_node(self, node_id):
+        '''
+            set node status to BREAKED and update to storage
+        '''
+        node = self._node_exist(node_id)
+        node.status = RunNodeStatus.BREAKED
+        self.storage.update(node)
+    def run_succeed(self,
+                    node_id,
+                    result_msg=None,
+                    results: List[HandleResult] = None):
+        '''
+            set node status to SUCCESS and update to storage
+        '''
+        node = self._node_exist(node_id)
+        node.status = RunNodeStatus.SUCCESS
+        node.result_msg = result_msg
+        node.end_time = time.time()
+        if results:
+            if not node.results:
+                node.results = []
+            node.results.extend(results)
+        self.storage.update(node)
+    def run_failed(self,
+                   node_id,
+                   result_msg=None,
+                   results: List[HandleResult] = None):
+        '''
+            set node status to FAILED and update to storage
+        '''
+        node = self._node_exist(node_id)
+        node.status = RunNodeStatus.FAILED
+        node.result_msg = result_msg
+        node.end_time = time.time()
+        if results:
+            if not node.results:
+                node.results = []
+            node.results.extend(results)
+        self.storage.update(node)
+    def run_timeout(self,
+                    node_id,
+                    result_msg=None):
+        '''
+            set node status to TIMEOUNT and update to storage
+        '''
+        node = self._node_exist(node_id)
+        node.status = RunNodeStatus.TIMEOUNT
+        node.result_msg = result_msg
+        self.storage.update(node)
+    def get_node(self, node_id: str) -> RunNode:
+        '''
+            get node from storage
+        '''
+        return self._find_node(node_id)
+    def get_nodes(self, session_id: str) -> List[RunNode]:
+        '''
+            get nodes from storage
+        '''
+        return self.storage.query(session_id)
+    def _node_exist(self, node_id: str):
+        node = self._find_node(node_id)
+        if not node:
+            raise Exception(f"node not found, node_id: {node_id}")
+        return node
+    def _find_node(self, node_id: str):
+        return self.storage.get(node_id)
+    def _judge_msg_from_busi_type(self, msg_from: str) -> RunNodeBusiType:
+        '''
+        judge msg_from busi_type
+        '''
+        if is_agent_by_name(msg_from):
+            return RunNodeBusiType.AGENT
+        if is_tool_by_name(msg_from):
+            return RunNodeBusiType.TOOL
+        return RunNodeBusiType.TASK
+class EventRuntimeStateManager(RuntimeStateManager):
+    def __init__(self, storage: StateStorage = InMemoryStateStorage.instance()):
+        super().__init__(storage)
+    def start_message_node(self, message: Message):
+        '''
+        create and start node while message handle started.
+        '''
+        run_node_busi_type = RunNodeBusiType.from_message_category(
+            message.category)
+        logger.info(
+            f"start message node: {message.receiver}, busi_type={run_node_busi_type}, node_id={message.id}")
+        if run_node_busi_type:
+            self.create_node(
+                node_id=message.id,
+                busi_type=run_node_busi_type,
+                busi_id=message.receiver,
+                session_id=message.session_id,
+                msg_id=message.id,
+                msg_from=message.sender)
+            self.run_node(message.id)
+    def save_message_handle_result(self, name: str, message: Message, result: Message = None):
+        '''
+        save message handle result
+        '''
+        run_node_busi_type = RunNodeBusiType.from_message_category(
+            message.category)
+        if run_node_busi_type:
+            if result and result.is_error():
+                handle_result = HandleResult(
+                    name=name,
+                    status=RunNodeStatus.FAILED,
+                    result=result)
+            else:
+                handle_result = HandleResult(
+                    name=name,
+                    status=RunNodeStatus.SUCCESS,
+                    result=result)
+            self.save_result(node_id=message.id, result=handle_result)
+    def end_message_node(self, message: Message):
+        '''
+        end node while message handle finished.
+        '''
+        run_node_busi_type = RunNodeBusiType.from_message_category(
+            message.category)
+        if run_node_busi_type:
+            node = self._node_exist(node_id=message.id)
+            status = RunNodeStatus.SUCCESS
+            if node.results:
+                for result in node.results:
+                    if result.status == RunNodeStatus.FAILED:
+                        status = RunNodeStatus.FAILED
+                        break
+            if status == RunNodeStatus.FAILED:
+                self.run_failed(node_id=message.id)
+            else:
+                self.run_succeed(node_id=message.id)

aworld/runners/task_runner.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# coding: utf-8
+# Copyright (c) 2025 inclusionAI.
+import abc
+import time
+import uuid
+from typing import Callable, Any
+from pydantic import BaseModel
+import aworld.tools
+from aworld.config import ConfigDict
+from aworld.config.conf import ToolConfig
+from aworld.core.agent.swarm import Swarm
+from aworld.core.common import Observation
+from aworld.core.context.base import Context
+from aworld.core.context.session import Session
+from aworld.core.tool.base import Tool, AsyncTool
+from aworld.core.task import Task, TaskResponse, Runner
+from aworld.logs.util import logger
+from aworld import trace
+class TaskRunner(Runner):
+    """Task based runner api class."""
+    __metaclass__ = abc.ABCMeta
+    def __init__(self,
+                 task: Task,
+                 *,
+                 agent_oriented: bool = True,
+                 daemon_target: Callable[..., Any] = None):
+        """Task runner initialize.
+        Args:
+            task: Task entity to be executed.
+            agent_oriented: Is it an agent oriented task, default is True.
+        """
+        if task.tools is None:
+            task.tools = []
+        if task.tool_names is None:
+            task.tool_names = []
+        if agent_oriented:
+            if not task.agent and not task.swarm:
+                raise ValueError("agent and swarm all is None.")
+            if task.agent and task.swarm:
+                raise ValueError("agent and swarm choose one only.")
+            if task.agent:
+                # uniform agent
+                task.swarm = Swarm(task.agent)
+        if task.conf is None:
+            task.conf = dict()
+        if isinstance(task.conf, BaseModel):
+            task.conf = task.conf.model_dump()
+        check_input = task.conf.get("check_input", False)
+        if check_input and not task.input:
+            raise ValueError("task no input")
+        self.context = task.context if task.context else Context.instance()
+        self.task = task
+        self.context.set_task(task)
+        self.agent_oriented = agent_oriented
+        self.daemon_target = daemon_target
+        self._use_demon = False if not task.conf else task.conf.get('use_demon', False)
+        self._exception = None
+        self.start_time = time.time()
+        self.step_agent_counter = {}
+    async def pre_run(self):
+        task = self.task
+        self.swarm = task.swarm
+        self.input = task.input
+        self.outputs = task.outputs
+        self.name = task.name
+        self.conf = task.conf if task.conf else ConfigDict()
+        self.tools = {tool.name(): tool for tool in task.tools} if task.tools else {}
+        task.tool_names.extend(self.tools.keys())
+        # lazy load
+        self.tool_names = task.tool_names
+        self.tools_conf = task.tools_conf
+        if self.tools_conf is None:
+            self.tools_conf = {}
+        # mcp performs special process, use async only in the runn
+        self.tools_conf['mcp'] = ToolConfig(use_async=True, name='mcp')
+        self.endless_threshold = task.endless_threshold
+        # build context
+        if task.session_id:
+            session = Session(session_id=task.session_id)
+        else:
+            session = Session(session_id=uuid.uuid1().hex)
+        trace_id = uuid.uuid1().hex if trace.get_current_span() is None else trace.get_current_span().get_trace_id()
+        self.context.task_id = self.name
+        self.context.trace_id = trace_id
+        self.context.session = session
+        self.context.swarm = self.swarm
+        # init tool state by reset(), and ignore them observation
+        observation = None
+        if self.tools:
+            for _, tool in self.tools.items():
+                # use the observation and info of the last one
+                if isinstance(tool, Tool):
+                    tool.context = self.context
+                    observation, info = tool.reset()
+                elif isinstance(tool, AsyncTool):
+                    observation, info = await tool.reset()
+                else:
+                    logger.warning(f"Unsupported tool type: {tool}, will ignored.")
+        if observation:
+            if not observation.content:
+                observation.content = self.input
+        else:
+            observation = Observation(content=self.input)
+        self.observation = observation
+        if self.swarm:
+            self.swarm.event_driven = task.event_driven
+            self.swarm.reset(observation.content, context=self.context, tools=self.tool_names)
+    async def post_run(self):
+        self.context.reset()
+    @abc.abstractmethod
+    async def do_run(self, context: Context = None) -> TaskResponse:
+        """Task do run."""

aworld/runners/utils.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# coding: utf-8
+# Copyright (c) 2025 inclusionAI.
+from typing import List, Dict
+from aworld.config import RunConfig
+from aworld.core.agent.swarm import GraphBuildType
+from aworld.core.common import Config
+from aworld.core.task import Task, TaskResponse, Runner
+from aworld.logs.util import logger
+from aworld.utils.common import new_instance, snake_to_camel
+async def choose_runners(tasks: List[Task]) -> List[Runner]:
+    """Choose the correct runner to run the task.
+    Args:
+        task: A task that contains agents, tools and datas.
+    Returns:
+        Runner instance or exception.
+    """
+    runners = []
+    for task in tasks:
+        # user custom runner class
+        runner_cls = task.runner_cls
+        if runner_cls:
+            return new_instance(runner_cls, task)
+        else:
+            # user runner class in the framework
+            if task.swarm:
+                task.swarm.event_driven = task.event_driven
+                execute_type = task.swarm.build_type
+            else:
+                execute_type = GraphBuildType.WORKFLOW.value
+            if task.event_driven:
+                runner = new_instance("aworld.runners.event_runner.TaskEventRunner", task)
+            else:
+                runner = new_instance(
+                    f"aworld.runners.call_driven_runner.{snake_to_camel(execute_type)}Runner",
+                    task
+                )
+        runners.append(runner)
+    return runners
+async def execute_runner(runners: List[Runner], run_conf: RunConfig) -> Dict[str, TaskResponse]:
+    """Execute runner in the runtime engine.
+    Args:
+        runners: The task processing flow.
+        run_conf: Runtime config, can choose the special computing engine to execute the runner.
+    """
+    if not run_conf:
+        run_conf = RunConfig()
+    name = run_conf.name
+    if run_conf.cls:
+        runtime_backend = new_instance(run_conf.cls, run_conf)
+    else:
+        runtime_backend = new_instance(
+            f"aworld.core.runtime_engine.{snake_to_camel(name)}Runtime", run_conf)
+    runtime_engine = runtime_backend.build_engine()
+    return await runtime_engine.execute([runner.run for runner in runners])
+def endless_detect(records: List[str], endless_threshold: int, root_agent_name: str):
+    """A very simple implementation of endless loop detection.
+    Args:
+        records: Call sequence of agent.
+        endless_threshold: Threshold for the number of repetitions.
+        root_agent_name: Name of the entrance agent.
+    """
+    if not records:
+        return False
+    threshold = endless_threshold
+    last_agent_name = root_agent_name
+    count = 1
+    for i in range(len(records) - 2, -1, -1):
+        if last_agent_name == records[i]:
+            count += 1
+        else:
+            last_agent_name = records[i]
+            count = 1
+        if count >= threshold:
+            logger.warning("detect loop, will exit the loop.")
+            return True
+    if len(records) > 6:
+        last_agent_name = None
+        # latest
+        for j in range(1, 3):
+            for i in range(len(records) - j, 0, -2):
+                if last_agent_name and last_agent_name == (records[i], records[i - 1]):
+                    count += 1
+                elif last_agent_name is None:
+                    last_agent_name = (records[i], records[i - 1])
+                    count = 1
+                else:
+                    last_agent_name = None
+                    break
+                if count >= threshold:
+                    logger.warning(f"detect loop: {last_agent_name}, will exit the loop.")
+                    return True
+    return False