|
import gradio as gr |
|
import pandas as pd |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
|
|
|
|
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") |
|
|
|
|
|
from huggingface_hub import login |
|
import os |
|
|
|
|
|
hf_token = os.environ.get("llama3.31-8b-token-new") |
|
|
|
|
|
if hf_token: |
|
login(token=hf_token) |
|
else: |
|
print("HF_TOKEN ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") |
|
|
|
|
|
|
|
|
|
from datasets import load_dataset |
|
|
|
df = load_dataset("HAERAE-HUB/KMMLU", "Accounting") |
|
|
|
def evaluate_model(question, choices): |
|
prompt = f"์ง๋ฌธ: {question}\n\n์ ํ์ง:\n" |
|
for i, choice in enumerate(choices): |
|
prompt += f"{chr(65 + i)}. {choice}\n" |
|
prompt += "\n๋ต๋ณ:" |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
with torch.no_grad(): |
|
outputs = model.generate(**inputs, max_new_tokens=1, temperature=0.0) |
|
|
|
answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip() |
|
return answer |
|
|
|
def run_kmmlu_test(subject): |
|
subject_df = df[df['subject'] == subject] |
|
correct = 0 |
|
total = len(subject_df) |
|
|
|
results = [] |
|
for _, row in subject_df.iterrows(): |
|
question = row['question'] |
|
choices = [row['A'], row['B'], row['C'], row['D']] |
|
correct_answer = row['answer'] |
|
|
|
model_answer = evaluate_model(question, choices) |
|
is_correct = model_answer == correct_answer |
|
|
|
if is_correct: |
|
correct += 1 |
|
|
|
results.append(f"์ง๋ฌธ: {question}\n๋ชจ๋ธ ๋ต๋ณ: {model_answer}\n์ ๋ต: {correct_answer}\n์ ํ๋: {'๋ง์' if is_correct else 'ํ๋ฆผ'}\n") |
|
|
|
accuracy = correct / total |
|
summary = f"์ฃผ์ : {subject}\n์ ํ๋: {accuracy:.2%} ({correct}/{total})\n\n" |
|
return summary + "\n".join(results) |
|
|
|
subjects = df['subject'].unique().tolist() |
|
|
|
iface = gr.Interface( |
|
|
|
fn=run_kmmlu_test, |
|
inputs=gr.Dropdown(choices=subjects, label="์ฃผ์ ์ ํ"), |
|
outputs="text", |
|
title="Llama 3์ ์ด์ฉํ KMMLU ํ
์คํธ", |
|
description="์ ํํ ์ฃผ์ ์ ๋ํด KMMLU ํ
์คํธ๋ฅผ ์คํํฉ๋๋ค." |
|
) |
|
|
|
iface.launch() |