ATK20 commited on
Commit
8b83970
·
verified ·
1 Parent(s): 9efd6bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -72
app.py CHANGED
@@ -2,107 +2,115 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
6
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
- DEFAULT_HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"
10
 
11
- # --- Basic Agent Definition ---
12
  class BasicAgent:
13
- def __init__(self, hf_token=None, model_name=DEFAULT_HF_MODEL):
14
- print("Initializing BasicAgent with LLM...")
15
  self.hf_token = hf_token
16
- self.model_name = model_name
17
  self.llm = None
18
 
19
- if hf_token:
20
- try:
21
- print(f"Loading model: {model_name}")
22
- self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
23
- self.model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token)
24
- self.llm = pipeline(
25
- "text-generation",
26
- model=self.model,
27
- tokenizer=self.tokenizer,
28
- device_map="auto"
29
- )
30
- print("Model loaded successfully")
31
- except Exception as e:
32
- print(f"Error loading model: {e}")
33
- raise Exception(f"Could not load model: {e}")
34
- else:
35
- print("No HF token provided - agent will use default answers")
36
-
37
  def __call__(self, question: str) -> str:
38
  if not self.llm:
39
- return "This is a default answer (no LLM initialized)"
40
 
41
  try:
42
- print(f"Generating answer for question: {question[:50]}...")
43
  response = self.llm(
44
  question,
45
- max_new_tokens=150,
46
  do_sample=True,
47
- temperature=0.7,
48
- top_p=0.9
49
  )
50
  return response[0]['generated_text']
51
  except Exception as e:
52
  print(f"Error generating answer: {e}")
53
  return f"Error generating answer: {e}"
54
 
55
- def run_and_submit_all(hf_token: str, request: gr.Request):
56
- """Main function to run evaluation and submit answers"""
57
- # Get user info from the request
 
 
58
  if not request.username:
59
- return "Please Login to Hugging Face with the button.", None
60
-
61
  username = request.username
62
  space_id = os.getenv("SPACE_ID")
63
  api_url = DEFAULT_API_URL
64
  questions_url = f"{api_url}/questions"
65
  submit_url = f"{api_url}/submit"
66
 
67
- # Initialize agent
68
  try:
69
- agent = BasicAgent(hf_token=hf_token)
70
  except Exception as e:
71
  return f"Error initializing agent: {e}", None
72
 
73
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
74
 
75
- # Fetch questions
76
  try:
77
  response = requests.get(questions_url, timeout=15)
78
  response.raise_for_status()
79
  questions_data = response.json()
80
  if not questions_data:
81
- return "Fetched questions list is empty or invalid format.", None
82
  except Exception as e:
83
  return f"Error fetching questions: {e}", None
84
 
85
- # Process questions
86
  results_log = []
87
  answers_payload = []
88
  for item in questions_data:
89
  task_id = item.get("task_id")
90
  question_text = item.get("question")
91
- if not task_id or question_text is None:
92
  continue
 
93
  try:
94
- submitted_answer = agent(question_text)
95
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
96
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
97
  except Exception as e:
98
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
99
 
100
  if not answers_payload:
101
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
102
 
103
- # Submit answers
104
  submission_data = {
105
- "username": username.strip(),
106
  "agent_code": agent_code,
107
  "answers": answers_payload
108
  }
@@ -110,47 +118,40 @@ def run_and_submit_all(hf_token: str, request: gr.Request):
110
  try:
111
  response = requests.post(submit_url, json=submission_data, timeout=60)
112
  response.raise_for_status()
113
- result_data = response.json()
114
- final_status = (
 
115
  f"Submission Successful!\n"
116
- f"User: {result_data.get('username')}\n"
117
- f"Overall Score: {result_data.get('score', 'N/A')}% "
118
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
119
- f"Message: {result_data.get('message', 'No message received.')}"
120
  )
121
- return final_status, pd.DataFrame(results_log)
122
  except Exception as e:
123
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
124
 
125
  # --- Gradio Interface ---
126
  with gr.Blocks() as demo:
127
  gr.Markdown("# LLM Agent Evaluation Runner")
128
  gr.Markdown("""
129
  **Instructions:**
130
- 1. Get your Hugging Face API token from [your settings](https://huggingface.co/settings/tokens)
131
- 2. Enter your token below
132
- 3. Log in to your Hugging Face account
133
- 4. Click 'Run Evaluation & Submit All Answers'
134
  """)
135
 
 
 
136
  with gr.Row():
137
- hf_token_input = gr.Textbox(
138
- label="Hugging Face API Token",
139
- type="password",
140
- placeholder="hf_xxxxxxxxxxxxxxxx",
141
- info="Required for LLM access"
142
- )
143
 
144
- gr.LoginButton()
145
-
146
- run_button = gr.Button("Run Evaluation & Submit All Answers")
147
-
148
- status_output = gr.Textbox(label="Run Status", lines=5)
149
  results_table = gr.DataFrame(label="Results", wrap=True)
150
 
151
- run_button.click(
152
  fn=run_and_submit_all,
153
- inputs=[hf_token_input],
154
  outputs=[status_output, results_table]
155
  )
156
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from transformers import pipeline
6
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
+ HF_MODEL_NAME = "facebook/bart-large-mnli" # Smaller, free model that works well in Spaces
10
 
11
+ # --- Enhanced Agent Definition ---
12
  class BasicAgent:
13
+ def __init__(self, hf_token=None):
14
+ print("Initializing LLM Agent...")
15
  self.hf_token = hf_token
 
16
  self.llm = None
17
 
18
+ try:
19
+ # Using a smaller model that works better in Spaces
20
+ self.llm = pipeline(
21
+ "text-generation",
22
+ model=HF_MODEL_NAME,
23
+ token=hf_token,
24
+ device_map="auto"
25
+ )
26
+ print("LLM initialized successfully")
27
+ except Exception as e:
28
+ print(f"Error initializing LLM: {e}")
29
+ # Fallback to simple responses if LLM fails
30
+ self.llm = None
31
+
 
 
 
 
32
  def __call__(self, question: str) -> str:
33
  if not self.llm:
34
+ return "This is a default answer (LLM not available)"
35
 
36
  try:
37
+ print(f"Generating answer for: {question[:50]}...")
38
  response = self.llm(
39
  question,
40
+ max_length=100,
41
  do_sample=True,
42
+ temperature=0.7
 
43
  )
44
  return response[0]['generated_text']
45
  except Exception as e:
46
  print(f"Error generating answer: {e}")
47
  return f"Error generating answer: {e}"
48
 
49
+ def run_and_submit_all(request: gr.Request):
50
+ """
51
+ Modified to work with Gradio's auth system
52
+ """
53
+ # Get username from auth
54
  if not request.username:
55
+ return "Please login with Hugging Face account", None
56
+
57
  username = request.username
58
  space_id = os.getenv("SPACE_ID")
59
  api_url = DEFAULT_API_URL
60
  questions_url = f"{api_url}/questions"
61
  submit_url = f"{api_url}/submit"
62
 
63
+ # 1. Instantiate Agent
64
  try:
65
+ agent = BasicAgent(hf_token=os.getenv("HF_TOKEN"))
66
  except Exception as e:
67
  return f"Error initializing agent: {e}", None
68
 
69
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
70
 
71
+ # 2. Fetch Questions
72
  try:
73
  response = requests.get(questions_url, timeout=15)
74
  response.raise_for_status()
75
  questions_data = response.json()
76
  if not questions_data:
77
+ return "No questions received from server", None
78
  except Exception as e:
79
  return f"Error fetching questions: {e}", None
80
 
81
+ # 3. Process Questions
82
  results_log = []
83
  answers_payload = []
84
  for item in questions_data:
85
  task_id = item.get("task_id")
86
  question_text = item.get("question")
87
+ if not task_id or not question_text:
88
  continue
89
+
90
  try:
91
+ answer = agent(question_text)
92
+ answers_payload.append({
93
+ "task_id": task_id,
94
+ "submitted_answer": answer
95
+ })
96
+ results_log.append({
97
+ "Task ID": task_id,
98
+ "Question": question_text,
99
+ "Submitted Answer": answer
100
+ })
101
  except Exception as e:
102
+ results_log.append({
103
+ "Task ID": task_id,
104
+ "Question": question_text,
105
+ "Submitted Answer": f"ERROR: {str(e)}"
106
+ })
107
 
108
  if not answers_payload:
109
+ return "No valid answers generated", pd.DataFrame(results_log)
110
 
111
+ # 4. Submit Answers
112
  submission_data = {
113
+ "username": username,
114
  "agent_code": agent_code,
115
  "answers": answers_payload
116
  }
 
118
  try:
119
  response = requests.post(submit_url, json=submission_data, timeout=60)
120
  response.raise_for_status()
121
+ result = response.json()
122
+
123
+ status = (
124
  f"Submission Successful!\n"
125
+ f"User: {result.get('username')}\n"
126
+ f"Score: {result.get('score', 'N/A')}% "
127
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
128
+ f"Message: {result.get('message', '')}"
129
  )
130
+ return status, pd.DataFrame(results_log)
131
  except Exception as e:
132
+ return f"Submission failed: {str(e)}", pd.DataFrame(results_log)
133
 
134
  # --- Gradio Interface ---
135
  with gr.Blocks() as demo:
136
  gr.Markdown("# LLM Agent Evaluation Runner")
137
  gr.Markdown("""
138
  **Instructions:**
139
+ 1. Log in with your Hugging Face account
140
+ 2. Click 'Run Evaluation'
141
+ 3. View your results
 
142
  """)
143
 
144
+ gr.LoginButton()
145
+
146
  with gr.Row():
147
+ run_btn = gr.Button("Run Evaluation & Submit Answers", variant="primary")
 
 
 
 
 
148
 
149
+ status_output = gr.Textbox(label="Status", interactive=False)
 
 
 
 
150
  results_table = gr.DataFrame(label="Results", wrap=True)
151
 
152
+ run_btn.click(
153
  fn=run_and_submit_all,
154
+ inputs=[],
155
  outputs=[status_output, results_table]
156
  )
157