Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| def greet(name): | |
| return "Hello " + name + "!!" | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
| from peft import PeftModel, PeftConfig | |
| class InferenceFineTunning: | |
| def __init__(self, model_path): | |
| peft_model_id = f"hyang0503/{model_path}" | |
| config = PeftConfig.from_pretrained(peft_model_id) | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map="auto") | |
| self.model = PeftModel.from_pretrained(self.model, peft_model_id) | |
| # self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
| self.tokenizer = AutoTokenizer.from_pretrained(peft_model_id) | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| self.model.eval() | |
| def generate(self, q): # 실습 노트북과 내용 다름 | |
| outputs = self.model.generate( | |
| **self.tokenizer( | |
| f"### 질문: {q}\n\n### 답변:", | |
| return_tensors='pt', | |
| return_token_type_ids=False | |
| ).to("cuda"), | |
| max_new_tokens=256, | |
| early_stopping=True, | |
| do_sample=True, | |
| eos_token_id=2, | |
| ) | |
| print(self.tokenizer.decode(outputs[0])) | |
| ifg = InferenceFineTunning("qlora-koalpaca") | |
| iface = gr.Interface(fn=ifg.generate, inputs="text", outputs="text") | |
| iface.launch() |