File size: 5,679 Bytes
10e9b7d
 
eccf8e4
3c4371f
182cf83
10e9b7d
182cf83
3db6293
9efd6bd
e80aab9
182cf83
31243f4
182cf83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31243f4
182cf83
 
 
e85b640
182cf83
 
 
 
 
 
 
 
 
e85b640
182cf83
 
4021bf3
9efd6bd
4e281a3
9efd6bd
 
7d65c66
3c4371f
9efd6bd
 
7e4a06b
31243f4
 
e80aab9
4e281a3
31243f4
182cf83
31243f4
 
182cf83
 
3c4371f
4e281a3
eccf8e4
31243f4
7d65c66
31243f4
 
4e281a3
7d65c66
4e281a3
e80aab9
4e281a3
7d65c66
 
31243f4
 
 
 
 
 
7d65c66
 
 
31243f4
4e281a3
31243f4
 
 
 
4e281a3
 
 
 
 
 
 
e80aab9
7d65c66
e80aab9
 
31243f4
e80aab9
 
3c4371f
 
 
e80aab9
4e281a3
7d65c66
4e281a3
182cf83
4e281a3
e80aab9
182cf83
4e281a3
e514fd7
182cf83
4e281a3
182cf83
4e281a3
 
e80aab9
182cf83
 
 
 
4e281a3
 
182cf83
 
7e4a06b
182cf83
31243f4
182cf83
4e281a3
 
e80aab9
e23ab90
 
9efd6bd
e23ab90
 
e80aab9
 
4e281a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import gradio as gr
import requests
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
DEFAULT_HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"

# --- Basic Agent Definition ---
class BasicAgent:
    def __init__(self, hf_token=None, model_name=DEFAULT_HF_MODEL):
        print("Initializing BasicAgent with LLM...")
        self.hf_token = hf_token
        self.model_name = model_name
        self.llm = None
        
        if hf_token:
            try:
                print(f"Loading model: {model_name}")
                self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
                self.model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token)
                self.llm = pipeline(
                    "text-generation",
                    model=self.model,
                    tokenizer=self.tokenizer,
                    device_map="auto"
                )
                print("Model loaded successfully")
            except Exception as e:
                print(f"Error loading model: {e}")
                raise Exception(f"Could not load model: {e}")
        else:
            print("No HF token provided - agent will use default answers")
    
    def __call__(self, question: str) -> str:
        if not self.llm:
            return "This is a default answer (no LLM initialized)"
            
        try:
            print(f"Generating answer for question: {question[:50]}...")
            response = self.llm(
                question,
                max_new_tokens=150,
                do_sample=True,
                temperature=0.7,
                top_p=0.9
            )
            return response[0]['generated_text']
        except Exception as e:
            print(f"Error generating answer: {e}")
            return f"Error generating answer: {e}"

def run_and_submit_all(hf_token: str, request: gr.Request):
    """Main function to run evaluation and submit answers"""
    # Get user info from the request
    if not request.username:
        return "Please Login to Hugging Face with the button.", None

    username = request.username
    space_id = os.getenv("SPACE_ID")
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # Initialize agent
    try:
        agent = BasicAgent(hf_token=hf_token)
    except Exception as e:
        return f"Error initializing agent: {e}", None
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Fetch questions
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            return "Fetched questions list is empty or invalid format.", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # Process questions
    results_log = []
    answers_payload = []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            continue
        try:
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # Submit answers
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }
    
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        return final_status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# LLM Agent Evaluation Runner")
    gr.Markdown("""
        **Instructions:**
        1. Get your Hugging Face API token from [your settings](https://huggingface.co/settings/tokens)
        2. Enter your token below
        3. Log in to your Hugging Face account
        4. Click 'Run Evaluation & Submit All Answers'
    """)

    with gr.Row():
        hf_token_input = gr.Textbox(
            label="Hugging Face API Token",
            type="password",
            placeholder="hf_xxxxxxxxxxxxxxxx",
            info="Required for LLM access"
        )
    
    gr.LoginButton()

    run_button = gr.Button("Run Evaluation & Submit All Answers")

    status_output = gr.Textbox(label="Run Status", lines=5)
    results_table = gr.DataFrame(label="Results", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        inputs=[hf_token_input],
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    demo.launch()