import os
from huggingface_hub import InferenceClient
import gradio as gr

token = os.environ["HF_TOKEN"]

client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct", token=token)

SYSTEM_PROMPT = """Answer the following questions as best you can. You have access to the following tools:

get_weather: Get the current weather in a given location

The way you use the tools is by specifying a json blob.
Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).

The only values that should be in the "action" field are:
get_weather: Get the current weather in a given location, args: {"location": {"type": "string"}}
example use :
```
{{
  "action": "get_weather",
  "action_input": {"location": "New York"}
}}

ALWAYS use the following format:

Question: the input question you must answer
Thought: you should always think about one action to take. Only one action at a time in this format:
Action:
```
$JSON_BLOB
```
Observation: the result of the action. This Observation is unique, complete, and the source of truth.
... (this Thought/Action/Observation can repeat N times, you should take several steps when needed. The $JSON_BLOB must be formatted as markdown and only use a SINGLE action at a time.)

You must always end your output with the following format:

Thought: I now know the final answer
Final Answer: the final answer to the original input question

Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer. """

prompt=f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{SYSTEM_PROMPT}
<|eot_id|><|start_header_id|>user<|end_header_id|>
What's the weather in London ?
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

output = client.text_generation(
    prompt,
    max_new_tokens=200,
    stop=["Observation:"]
)

# Dummy function
def get_weather(location):
    return f"the weather in {location} is sunny with low temperatures. \n"

new_prompt=prompt+output+get_weather('London')

final_output = client.text_generation(
    new_prompt,
    max_new_tokens=200,
)

def llm():
    return final_output

app = gr.Interface(
    title="Simple Agent",
    description="A simple agent that completes the text.",
    fn=llm, 
    inputs=None,
    outputs="text"
    )
app.launch()