# env variable needed: HF_TOKEN

from llama_index.core import PromptTemplate
from llama_index.core.workflow import Context
from llama_index.core.agent.workflow import ReActAgent, AgentStream, ToolCallResult
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.tools.wikipedia import WikipediaToolSpec
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.tools.code_interpreter import CodeInterpreterToolSpec

from .prompt import custom_react_system_header_str
from .custom_tools import query_image_tool, automatic_speech_recognition_tool

class LLamaIndexAgent:
    def __init__(self,
                 model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
                 provider="hf-inference",
                 show_tools_desc=True,
                 show_prompt=True):

        # LLM definition
        llm = HuggingFaceInferenceAPI(model_name=model_name, # needs HF_TOKEN in env
                                      provider=provider)
        print(f"LLamaIndexAgent initialized with model \"{model_name}\"")

        # tools definition
        tool_spec_list = []
        tool_spec_list += WikipediaToolSpec().to_tool_list()
        tool_spec_list += DuckDuckGoSearchToolSpec().to_tool_list()
        tool_spec_list += CodeInterpreterToolSpec().to_tool_list()
        tool_spec_list += [query_image_tool, automatic_speech_recognition_tool]

        # agent definition
        self.agent = ReActAgent(llm=llm, tools=tool_spec_list)

        # update default prompt with a custom one
        custom_react_system_header = PromptTemplate(custom_react_system_header_str)
        self.agent.update_prompts({"react_header": custom_react_system_header})

        # context definition
        self.ctx = Context(self.agent)

        if show_tools_desc:
            for i, tool in enumerate(tool_spec_list):
                print("\n" + "="*30 + f" Tool {i+1} " + "="*30)
                print(tool.metadata.description)

        if show_prompt:
            prompt_dict = self.agent.get_prompts()
            for k, v in prompt_dict.items():
                print("\n" + "="*30 + f" Prompt: {k} " + "="*30)
                print(v.template)

    async def __call__(self, question: str) -> str:
        print("\n\n"+"*"*50)
        print(f"Agent received question: {question}")
        print("*"*50)

        handler = self.agent.run(question, ctx=self.ctx)
        async for ev in handler.stream_events():
            # if isinstance(ev, ToolCallResult):
            #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
            if isinstance(ev, AgentStream):
                print(f"{ev.delta}", end="", flush=True)

        response = await handler

        # post-process the response (cast AgentOutput to str and keep only what's after "FINAL ANSWER:" for the exact match)
        response = str(response)
        try:
            response = response.split("FINAL ANSWER:")[-1].strip()
        except:
            print('Could not split response on "FINAL ANSWER:"')
        print("\n\n"+"-"*50)
        print(f"Agent returning with answer: {response}")

        # clear context for next question before returning
        self.ctx.clear()

        return response