import gradio as gr import pandas as pd from transformers import AutoTokenizer, AutoModelForCausalLM import torch from huggingface_hub import login import os from datasets import load_dataset import accelerate # 환경 변수에서 토큰 가져오기 hf_token = os.environ.get("HF_TOKEN", None) # Hugging Face 로그인 if hf_token: login(token=hf_token, add_to_git_credential=True) else: print("HF_TOKEN 환경 변수 설정 오류") # model, tokenizer 셋팅 model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=False ,token=hf_token) #지운 옵션: device_map="auto" # KMMLU 데이터셋 로드 #dataset = load_dataset("HAERAE-HUB/KMMLU", "Accounting") dataset = load_dataset("HAERAE-HUB/KMMLU") df = dataset['test'].to_pandas() def evaluate_model(question, choices): prompt = f"질문: {question}\n\n선택지:\n" for i, choice in enumerate(choices): prompt += f"{chr(65 + i)}. {choice}\n" prompt += "\n답변:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=1, temperature=0.0) answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip() return answer def run_kmmlu_test(subject): subject_df = df[df['subject'] == subject] correct = 0 total = len(subject_df) results = [] for _, row in subject_df.iterrows(): question = row['question'] choices = [row['A'], row['B'], row['C'], row['D']] correct_answer = row['answer'] model_answer = evaluate_model(question, choices) is_correct = model_answer == correct_answer if is_correct: correct += 1 results.append(f"질문: {question}\n모델 답변: {model_answer}\n정답: {correct_answer}\n정확도: {'맞음' if is_correct else '틀림'}\n") accuracy = correct / total summary = f"주제: {subject}\n정확도: {accuracy:.2%} ({correct}/{total})\n\n" return summary + "\n".join(results) subjects=df['subject'].unique().tolist() iface = gr.Interface( fn=run_kmmlu_test, inputs="Accounting", inputs=gr.Dropdown(choices=subjects, label="주제 선택"), outputs="text", title="Llama 3를 이용한 KMMLU 테스트", description="선택한 주제에 대해 KMMLU 테스트를 실행합니다." ) iface.launch()