Update app.py
Browse files
app.py
CHANGED
@@ -6,154 +6,113 @@ from transformers import pipeline
|
|
6 |
|
7 |
# --- Constants ---
|
8 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
9 |
-
HF_MODEL_NAME = "facebook/bart-large-mnli" #
|
10 |
|
11 |
-
# ---
|
12 |
class BasicAgent:
|
13 |
-
def __init__(self
|
14 |
-
print("Initializing
|
15 |
-
self.hf_token = hf_token
|
16 |
-
self.llm = None
|
17 |
-
|
18 |
try:
|
19 |
-
# Using a smaller model that works better in Spaces
|
20 |
self.llm = pipeline(
|
21 |
"text-generation",
|
22 |
model=HF_MODEL_NAME,
|
23 |
-
token=hf_token,
|
24 |
device_map="auto"
|
25 |
)
|
26 |
-
print("LLM initialized successfully")
|
27 |
except Exception as e:
|
28 |
-
print(f"
|
29 |
-
# Fallback to simple responses if LLM fails
|
30 |
self.llm = None
|
31 |
|
32 |
def __call__(self, question: str) -> str:
|
33 |
if not self.llm:
|
34 |
-
return "
|
35 |
-
|
36 |
try:
|
37 |
-
|
38 |
-
response = self.llm(
|
39 |
-
question,
|
40 |
-
max_length=100,
|
41 |
-
do_sample=True,
|
42 |
-
temperature=0.7
|
43 |
-
)
|
44 |
return response[0]['generated_text']
|
45 |
except Exception as e:
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
def run_and_submit_all(request: gr.Request):
|
50 |
-
"""
|
51 |
-
Modified to work with Gradio's auth system
|
52 |
-
"""
|
53 |
-
# Get username from auth
|
54 |
-
if not request.username:
|
55 |
-
return "Please login with Hugging Face account", None
|
56 |
-
|
57 |
-
username = request.username
|
58 |
space_id = os.getenv("SPACE_ID")
|
59 |
api_url = DEFAULT_API_URL
|
60 |
-
|
61 |
-
submit_url = f"{api_url}/submit"
|
62 |
-
|
63 |
-
# 1. Instantiate Agent
|
64 |
-
try:
|
65 |
-
agent = BasicAgent(hf_token=os.getenv("HF_TOKEN"))
|
66 |
-
except Exception as e:
|
67 |
-
return f"Error initializing agent: {e}", None
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
# 2. Fetch Questions
|
72 |
try:
|
73 |
-
response = requests.get(
|
74 |
-
response.
|
75 |
-
questions_data = response.json()
|
76 |
-
if not questions_data:
|
77 |
-
return "No questions received from server", None
|
78 |
except Exception as e:
|
79 |
-
return f"
|
80 |
|
81 |
-
#
|
82 |
-
|
83 |
-
|
84 |
-
for
|
85 |
-
task_id = item.get("task_id")
|
86 |
-
question_text = item.get("question")
|
87 |
-
if not task_id or not question_text:
|
88 |
-
continue
|
89 |
-
|
90 |
try:
|
91 |
-
answer = agent(
|
92 |
-
|
93 |
-
"task_id": task_id,
|
94 |
"submitted_answer": answer
|
95 |
})
|
96 |
-
|
97 |
-
"Task ID": task_id,
|
98 |
-
"Question":
|
99 |
-
"
|
100 |
})
|
101 |
except Exception as e:
|
102 |
-
|
103 |
-
"Task ID": task_id,
|
104 |
-
"Question":
|
105 |
-
"
|
106 |
})
|
107 |
|
108 |
-
|
109 |
-
return "No valid answers generated", pd.DataFrame(results_log)
|
110 |
-
|
111 |
-
# 4. Submit Answers
|
112 |
-
submission_data = {
|
113 |
-
"username": username,
|
114 |
-
"agent_code": agent_code,
|
115 |
-
"answers": answers_payload
|
116 |
-
}
|
117 |
-
|
118 |
try:
|
119 |
-
response = requests.post(
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
result = response.json()
|
122 |
-
|
123 |
-
|
124 |
-
f"
|
125 |
-
|
126 |
-
f"Score: {result.get('score', 'N/A')}% "
|
127 |
-
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
|
128 |
-
f"Message: {result.get('message', '')}"
|
129 |
)
|
130 |
-
return status, pd.DataFrame(results_log)
|
131 |
except Exception as e:
|
132 |
-
return f"Submission failed: {str(e)}", pd.DataFrame(
|
133 |
|
134 |
# --- Gradio Interface ---
|
135 |
with gr.Blocks() as demo:
|
136 |
-
gr.Markdown("# LLM Agent Evaluation
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
144 |
gr.LoginButton()
|
145 |
|
146 |
-
|
147 |
-
|
|
|
148 |
|
149 |
-
status_output = gr.Textbox(label="Status", interactive=False)
|
150 |
-
results_table = gr.DataFrame(label="Results", wrap=True)
|
151 |
-
|
152 |
run_btn.click(
|
153 |
fn=run_and_submit_all,
|
154 |
-
|
155 |
-
outputs=[status_output, results_table]
|
156 |
)
|
157 |
|
158 |
if __name__ == "__main__":
|
159 |
-
demo.launch()
|
|
|
6 |
|
7 |
# --- Constants ---
|
8 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
9 |
+
HF_MODEL_NAME = "facebook/bart-large-mnli" # Free model that works in Spaces
|
10 |
|
11 |
+
# --- Agent Definition ---
|
12 |
class BasicAgent:
|
13 |
+
def __init__(self):
|
14 |
+
print("Initializing Agent...")
|
|
|
|
|
|
|
15 |
try:
|
|
|
16 |
self.llm = pipeline(
|
17 |
"text-generation",
|
18 |
model=HF_MODEL_NAME,
|
|
|
19 |
device_map="auto"
|
20 |
)
|
|
|
21 |
except Exception as e:
|
22 |
+
print(f"LLM initialization failed: {e}")
|
|
|
23 |
self.llm = None
|
24 |
|
25 |
def __call__(self, question: str) -> str:
|
26 |
if not self.llm:
|
27 |
+
return "Default answer (LLM not available)"
|
28 |
+
|
29 |
try:
|
30 |
+
response = self.llm(question, max_length=100)
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
return response[0]['generated_text']
|
32 |
except Exception as e:
|
33 |
+
return f"Error: {str(e)}"
|
34 |
+
|
35 |
+
def run_and_submit_all():
|
36 |
+
"""Simplified version that works with Gradio auth"""
|
37 |
+
# Get username from Gradio's auth system
|
38 |
+
username = os.getenv("GRADIO_AUTH_USERNAME")
|
39 |
+
if not username:
|
40 |
+
return "Please login first", None
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
space_id = os.getenv("SPACE_ID")
|
43 |
api_url = DEFAULT_API_URL
|
44 |
+
agent = BasicAgent()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
# Fetch questions
|
|
|
|
|
47 |
try:
|
48 |
+
response = requests.get(f"{api_url}/questions", timeout=15)
|
49 |
+
questions = response.json()
|
|
|
|
|
|
|
50 |
except Exception as e:
|
51 |
+
return f"Failed to get questions: {str(e)}", None
|
52 |
|
53 |
+
# Process questions
|
54 |
+
results = []
|
55 |
+
answers = []
|
56 |
+
for q in questions:
|
|
|
|
|
|
|
|
|
|
|
57 |
try:
|
58 |
+
answer = agent(q.get("question", ""))
|
59 |
+
answers.append({
|
60 |
+
"task_id": q.get("task_id"),
|
61 |
"submitted_answer": answer
|
62 |
})
|
63 |
+
results.append({
|
64 |
+
"Task ID": q.get("task_id"),
|
65 |
+
"Question": q.get("question"),
|
66 |
+
"Answer": answer
|
67 |
})
|
68 |
except Exception as e:
|
69 |
+
results.append({
|
70 |
+
"Task ID": q.get("task_id"),
|
71 |
+
"Question": q.get("question"),
|
72 |
+
"Answer": f"Error: {str(e)}"
|
73 |
})
|
74 |
|
75 |
+
# Submit answers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
try:
|
77 |
+
response = requests.post(
|
78 |
+
f"{api_url}/submit",
|
79 |
+
json={
|
80 |
+
"username": username,
|
81 |
+
"agent_code": f"https://huggingface.co/spaces/{space_id}",
|
82 |
+
"answers": answers
|
83 |
+
},
|
84 |
+
timeout=60
|
85 |
+
)
|
86 |
result = response.json()
|
87 |
+
return (
|
88 |
+
f"Success! Score: {result.get('score', 'N/A')}%\n"
|
89 |
+
f"Correct: {result.get('correct_count', 0)}/{result.get('total_attempted', 0)}",
|
90 |
+
pd.DataFrame(results)
|
|
|
|
|
|
|
91 |
)
|
|
|
92 |
except Exception as e:
|
93 |
+
return f"Submission failed: {str(e)}", pd.DataFrame(results)
|
94 |
|
95 |
# --- Gradio Interface ---
|
96 |
with gr.Blocks() as demo:
|
97 |
+
gr.Markdown("# LLM Agent Evaluation")
|
98 |
+
|
99 |
+
with gr.Accordion("Instructions", open=False):
|
100 |
+
gr.Markdown("""
|
101 |
+
1. Click the login button
|
102 |
+
2. Authorize with your Hugging Face account
|
103 |
+
3. Click 'Run Evaluation'
|
104 |
+
""")
|
105 |
+
|
106 |
gr.LoginButton()
|
107 |
|
108 |
+
run_btn = gr.Button("Run Evaluation", variant="primary")
|
109 |
+
status = gr.Textbox(label="Status")
|
110 |
+
results = gr.DataFrame(label="Results", wrap=True)
|
111 |
|
|
|
|
|
|
|
112 |
run_btn.click(
|
113 |
fn=run_and_submit_all,
|
114 |
+
outputs=[status, results]
|
|
|
115 |
)
|
116 |
|
117 |
if __name__ == "__main__":
|
118 |
+
demo.launch(auth_message="Please login with your Hugging Face account")
|