File size: 2,446 Bytes
cfbe56d 70267fe 2f24e2f 7ad5a82 70267fe 49a3d43 dd69229 dcd6208 2f24e2f 70267fe 3946289 70267fe 49a3d43 d94d325 2f24e2f a0333c0 2f24e2f 902c41f d94d325 2f24e2f dd69229 2f24e2f cfbe56d 3433e6f cfbe56d f429607 cfbe56d f429607 cfbe56d f429607 cfbe56d f429607 cfbe56d 7a03b9a 9d60753 f429607 9d60753 f429607 9d60753 cfbe56d da49503 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import login
import os
from datasets import load_dataset
import accelerate
# ํ๊ฒฝ ๋ณ์์์ ํ ํฐ ๊ฐ์ ธ์ค๊ธฐ
hf_token = os.environ.get("HF_TOKEN")
# Hugging Face ๋ก๊ทธ์ธ
if hf_token:
login(token=hf_token, add_to_git_credential=True)
else:
print("HF_TOKEN ํ๊ฒฝ ๋ณ์ ์ค์ ์ค๋ฅ")
# model, tokenizer ์
ํ
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=False ,token=hf_token)
#์ง์ด ์ต์
: device_map="auto"
# KMMLU ๋ฐ์ดํฐ์
๋ก๋
dataset = load_dataset("HAERAE-HUB/KMMLU", "Accounting")
#dataset = load_dataset("HAERAE-HUB/KMMLU")
df = dataset['test'].to_pandas()
def evaluate_model(question, choices):
prompt = f"์ง๋ฌธ: {question}\n\n์ ํ์ง:\n"
for i, choice in enumerate(choices):
prompt += f"{chr(65 + i)}. {choice}\n"
prompt += "\n๋ต๋ณ:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=1, temperature=0.1)
answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip()
return answer
def run_kmmlu_test():
correct = 0
total = len(df)
results = []
for _, row in df.iterrows():
question = row['question']
choices = [row['A'], row['B'], row['C'], row['D']]
correct_answer = row['answer']
model_answer = evaluate_model(question, choices)
is_correct = model_answer == correct_answer
if is_correct:
correct += 1
results.append(f"์ง๋ฌธ: {question}\n๋ชจ๋ธ ๋ต๋ณ: {model_answer}\n์ ๋ต: {correct_answer}\n์ ํ๋: {'๋ง์' if is_correct else 'ํ๋ฆผ'}\n")
accuracy = correct / total
summary = f"์ ์ฒด ํ
์คํธ ๊ฒฐ๊ณผ\n์ ํ๋: {accuracy:.2%} ({correct}/{total})\n\n"
return summary + "\n".join(results)
iface = gr.Interface(
fn=run_kmmlu_test,
inputs=None,
#inputs=gr.Dropdown(choices=subjects, label="์ฃผ์ ์ ํ"),
outputs="text",
title="Llama 3๋ฅผ ์ด์ฉํ KMMLU ํ
์คํธ",
description="Accounting ์์ญ์ ๋ํ KMMLU ํ
์คํธ ์ํ"
)
iface.launch(share=True) |