|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor |
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter |
|
from openinference.instrumentation.smolagents import SmolagentsInstrumentor |
|
from opentelemetry.sdk.trace import TracerProvider |
|
from opentelemetry import trace |
|
from evaluator import Evaluator |
|
from runner import Runner |
|
from settings import Settings |
|
import os |
|
import pandas as pd |
|
import gradio as gr |
|
import logging |
|
logging.basicConfig(level=logging.INFO, force=True) |
|
logger = logging.getLogger(__name__) |
|
settings = Settings() |
|
evaluator = Evaluator(settings) |
|
runner = Runner(settings) |
|
|
|
|
|
|
|
trace_provider = TracerProvider() |
|
|
|
|
|
trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter())) |
|
|
|
|
|
trace.set_tracer_provider(trace_provider) |
|
tracer = trace.get_tracer(__name__) |
|
|
|
|
|
SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) |
|
|
|
|
|
def run(test_mode=False) -> pd.DataFrame: |
|
if test_mode: |
|
questions = [evaluator.get_one_question()] |
|
|
|
|
|
|
|
else: |
|
questions = evaluator.get_questions() |
|
|
|
return runner.run_agent(questions) |
|
|
|
|
|
def submit(): |
|
evaluator.submit_answers() |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Basic Agent Evaluation Runner") |
|
gr.Markdown( |
|
""" |
|
**Instructions:** |
|
|
|
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ... |
|
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission. |
|
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score. |
|
|
|
--- |
|
**Disclaimers:** |
|
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions). |
|
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async. |
|
""" |
|
) |
|
|
|
gr.LoginButton() |
|
|
|
run_one_button = gr.Button("Get One Answer") |
|
run_all_button = gr.Button("Run Full Evaluation") |
|
submit_button = gr.Button("Submit All Answers") |
|
|
|
status_output = gr.Textbox( |
|
label="Run Status / Submission Result", lines=5, interactive=False) |
|
results_table = gr.DataFrame( |
|
label="Questions and Agent Answers", wrap=True) |
|
|
|
run_one_button.click( |
|
fn=run, inputs=[gr.Checkbox(value=True, visible=False)], |
|
outputs=[results_table] |
|
) |
|
run_all_button.click( |
|
fn=run, inputs=[gr.Checkbox(value=False, visible=False)], |
|
outputs=[results_table] |
|
) |
|
submit_button.click( |
|
fn=evaluator.get_one_question, |
|
outputs=[status_output] |
|
) |
|
|
|
if __name__ == "__main__": |
|
print("\n" + "-"*30 + " App Starting " + "-"*30) |
|
|
|
space_host_startup = os.getenv("SPACE_HOST") |
|
space_id_startup = os.getenv("SPACE_ID") |
|
|
|
if space_host_startup: |
|
print(f"✅ SPACE_HOST found: {space_host_startup}") |
|
print( |
|
f" Runtime URL should be: https://{space_host_startup}.hf.space") |
|
else: |
|
print("ℹ️ SPACE_HOST environment variable not found (running locally?).") |
|
|
|
if space_id_startup: |
|
print(f"✅ SPACE_ID found: {space_id_startup}") |
|
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") |
|
print( |
|
f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") |
|
else: |
|
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.") |
|
|
|
print("-"*(60 + len(" App Starting ")) + "\n") |
|
|
|
print("Launching Gradio Interface for Basic Agent Evaluation...") |
|
demo.launch(debug=True, share=False) |
|
|