Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from datasets import load_dataset
|
|
8 |
import accelerate
|
9 |
|
10 |
# 환경 변수에서 토큰 가져오기
|
11 |
-
hf_token = os.environ.get("HF_TOKEN",
|
12 |
|
13 |
# Hugging Face 로그인
|
14 |
if hf_token:
|
@@ -40,13 +40,12 @@ def evaluate_model(question, choices):
|
|
40 |
answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip()
|
41 |
return answer
|
42 |
|
43 |
-
def run_kmmlu_test(
|
44 |
-
subject_df = df[df['subject'] == subject]
|
45 |
correct = 0
|
46 |
-
total = len(
|
47 |
|
48 |
results = []
|
49 |
-
for _, row in
|
50 |
question = row['question']
|
51 |
choices = [row['A'], row['B'], row['C'], row['D']]
|
52 |
correct_answer = row['answer']
|
@@ -60,18 +59,17 @@ def run_kmmlu_test(subject):
|
|
60 |
results.append(f"질문: {question}\n모델 답변: {model_answer}\n정답: {correct_answer}\n정확도: {'맞음' if is_correct else '틀림'}\n")
|
61 |
|
62 |
accuracy = correct / total
|
63 |
-
summary = f"
|
64 |
return summary + "\n".join(results)
|
65 |
|
66 |
-
subjects=df['subject'].unique().tolist()
|
67 |
|
68 |
iface = gr.Interface(
|
69 |
fn=run_kmmlu_test,
|
70 |
-
|
71 |
-
inputs=gr.Dropdown(choices=subjects, label="주제 선택"),
|
72 |
outputs="text",
|
73 |
title="Llama 3를 이용한 KMMLU 테스트",
|
74 |
-
description="
|
75 |
)
|
76 |
|
77 |
iface.launch()
|
|
|
8 |
import accelerate
|
9 |
|
10 |
# 환경 변수에서 토큰 가져오기
|
11 |
+
hf_token = os.environ.get("HF_TOKEN", "Accounting")
|
12 |
|
13 |
# Hugging Face 로그인
|
14 |
if hf_token:
|
|
|
40 |
answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip()
|
41 |
return answer
|
42 |
|
43 |
+
def run_kmmlu_test():
|
|
|
44 |
correct = 0
|
45 |
+
total = len(df)
|
46 |
|
47 |
results = []
|
48 |
+
for _, row in df.iterrows():
|
49 |
question = row['question']
|
50 |
choices = [row['A'], row['B'], row['C'], row['D']]
|
51 |
correct_answer = row['answer']
|
|
|
59 |
results.append(f"질문: {question}\n모델 답변: {model_answer}\n정답: {correct_answer}\n정확도: {'맞음' if is_correct else '틀림'}\n")
|
60 |
|
61 |
accuracy = correct / total
|
62 |
+
summary = f"전체 테스트 결과\n정확도: {accuracy:.2%} ({correct}/{total})\n\n"
|
63 |
return summary + "\n".join(results)
|
64 |
|
|
|
65 |
|
66 |
iface = gr.Interface(
|
67 |
fn=run_kmmlu_test,
|
68 |
+
inputs=None,
|
69 |
+
#inputs=gr.Dropdown(choices=subjects, label="주제 선택"),
|
70 |
outputs="text",
|
71 |
title="Llama 3를 이용한 KMMLU 테스트",
|
72 |
+
description="Accounting 영역에 대한 KMMLU 테스트 수행"
|
73 |
)
|
74 |
|
75 |
iface.launch()
|