File size: 4,969 Bytes
10e9b7d
 
eccf8e4
3c4371f
8b83970
10e9b7d
182cf83
3db6293
8b83970
e80aab9
8b83970
31243f4
8b83970
 
182cf83
 
 
8b83970
 
 
 
 
 
 
 
 
 
 
 
 
 
31243f4
182cf83
8b83970
182cf83
e85b640
8b83970
182cf83
 
8b83970
182cf83
8b83970
182cf83
 
e85b640
182cf83
 
4021bf3
8b83970
 
 
 
 
9efd6bd
8b83970
 
9efd6bd
 
7e4a06b
31243f4
 
e80aab9
8b83970
31243f4
8b83970
31243f4
 
182cf83
 
3c4371f
8b83970
eccf8e4
31243f4
7d65c66
31243f4
 
8b83970
7d65c66
4e281a3
e80aab9
8b83970
7d65c66
 
31243f4
 
 
8b83970
31243f4
8b83970
31243f4
8b83970
 
 
 
 
 
 
 
 
 
31243f4
8b83970
 
 
 
 
31243f4
 
8b83970
31243f4
8b83970
4e281a3
8b83970
4e281a3
 
 
 
e80aab9
7d65c66
e80aab9
8b83970
 
 
e80aab9
8b83970
 
 
 
e80aab9
8b83970
7d65c66
8b83970
182cf83
4e281a3
e80aab9
182cf83
4e281a3
e514fd7
8b83970
 
 
4e281a3
e80aab9
8b83970
 
182cf83
8b83970
182cf83
8b83970
4e281a3
e80aab9
8b83970
e23ab90
8b83970
e23ab90
 
e80aab9
 
4e281a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
import gradio as gr
import requests
import pandas as pd
from transformers import pipeline

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
HF_MODEL_NAME = "facebook/bart-large-mnli"  # Smaller, free model that works well in Spaces

# --- Enhanced Agent Definition ---
class BasicAgent:
    def __init__(self, hf_token=None):
        print("Initializing LLM Agent...")
        self.hf_token = hf_token
        self.llm = None
        
        try:
            # Using a smaller model that works better in Spaces
            self.llm = pipeline(
                "text-generation",
                model=HF_MODEL_NAME,
                token=hf_token,
                device_map="auto"
            )
            print("LLM initialized successfully")
        except Exception as e:
            print(f"Error initializing LLM: {e}")
            # Fallback to simple responses if LLM fails
            self.llm = None

    def __call__(self, question: str) -> str:
        if not self.llm:
            return "This is a default answer (LLM not available)"
            
        try:
            print(f"Generating answer for: {question[:50]}...")
            response = self.llm(
                question,
                max_length=100,
                do_sample=True,
                temperature=0.7
            )
            return response[0]['generated_text']
        except Exception as e:
            print(f"Error generating answer: {e}")
            return f"Error generating answer: {e}"

def run_and_submit_all(request: gr.Request):
    """
    Modified to work with Gradio's auth system
    """
    # Get username from auth
    if not request.username:
        return "Please login with Hugging Face account", None
    
    username = request.username
    space_id = os.getenv("SPACE_ID")
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate Agent
    try:
        agent = BasicAgent(hf_token=os.getenv("HF_TOKEN"))
    except Exception as e:
        return f"Error initializing agent: {e}", None
    
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # 2. Fetch Questions
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            return "No questions received from server", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 3. Process Questions
    results_log = []
    answers_payload = []
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or not question_text:
            continue
            
        try:
            answer = agent(question_text)
            answers_payload.append({
                "task_id": task_id,
                "submitted_answer": answer
            })
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": answer
            })
        except Exception as e:
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": f"ERROR: {str(e)}"
            })

    if not answers_payload:
        return "No valid answers generated", pd.DataFrame(results_log)

    # 4. Submit Answers
    submission_data = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers_payload
    }
    
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result = response.json()
        
        status = (
            f"Submission Successful!\n"
            f"User: {result.get('username')}\n"
            f"Score: {result.get('score', 'N/A')}% "
            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
            f"Message: {result.get('message', '')}"
        )
        return status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {str(e)}", pd.DataFrame(results_log)

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# LLM Agent Evaluation Runner")
    gr.Markdown("""
        **Instructions:**
        1. Log in with your Hugging Face account
        2. Click 'Run Evaluation'
        3. View your results
    """)

    gr.LoginButton()
    
    with gr.Row():
        run_btn = gr.Button("Run Evaluation & Submit Answers", variant="primary")
    
    status_output = gr.Textbox(label="Status", interactive=False)
    results_table = gr.DataFrame(label="Results", wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        inputs=[],
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    demo.launch()