AilexGPT commited on
Commit
371b2f1
·
1 Parent(s): 0fe52f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -4
app.py CHANGED
@@ -1,14 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from datasets import load_dataset
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
3
- import torch
4
 
5
  #dataset_name = "timdettmers/openassistant-guanaco" ###Human ,.,,,,,, ###Assistant
6
 
7
  dataset_name = 'AlexanderDoria/novel17_test' #french novels
8
  dataset = load_dataset(dataset_name, split="train")
9
- demo.launch()
10
 
 
11
 
 
 
12
 
13
  model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
14
 
@@ -23,4 +54,115 @@ model = AutoModelForCausalLM.from_pretrained(
23
  quantization_config=bnb_config,
24
  trust_remote_code=True
25
  )
26
- model.config.use_cache = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Kopie von Llama 2 Fine-Tuning using QLora
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/13dJqh-1y3KYGi5R82eqXGafkM5Y5k_ff
8
+
9
+ **Code Credit: Hugging Face**
10
+
11
+ **Dataset Credit: https://twitter.com/Dorialexander/status/1681671177696161794 **
12
+
13
+ ## Finetune Llama-2-7b on a Google colab
14
+
15
+ Welcome to this Google Colab notebook that shows how to fine-tune the recent Llama-2-7b model on a single Google colab and turn it into a chatbot
16
+
17
+ We will leverage PEFT library from Hugging Face ecosystem, as well as QLoRA for more memory efficient finetuning
18
+
19
+ ## Setup
20
+
21
+ Run the cells below to setup and install the required libraries. For our experiment we will need `accelerate`, `peft`, `transformers`, `datasets` and TRL to leverage the recent [`SFTTrainer`](https://huggingface.co/docs/trl/main/en/sft_trainer). We will use `bitsandbytes` to [quantize the base model into 4bit](https://huggingface.co/blog/4bit-transformers-bitsandbytes). We will also install `einops` as it is a requirement to load Falcon models.
22
+ """
23
+
24
+ !pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
25
+ !pip install -q datasets bitsandbytes einops wandb
26
+
27
+ """## Dataset
28
+
29
+
30
+ """
31
+
32
  from datasets import load_dataset
 
 
33
 
34
  #dataset_name = "timdettmers/openassistant-guanaco" ###Human ,.,,,,,, ###Assistant
35
 
36
  dataset_name = 'AlexanderDoria/novel17_test' #french novels
37
  dataset = load_dataset(dataset_name, split="train")
 
38
 
39
+ """## Loading the model"""
40
 
41
+ import torch
42
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
43
 
44
  model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
45
 
 
54
  quantization_config=bnb_config,
55
  trust_remote_code=True
56
  )
57
+ model.config.use_cache = False
58
+
59
+ """Let's also load the tokenizer below"""
60
+
61
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
62
+ tokenizer.pad_token = tokenizer.eos_token
63
+
64
+ from peft import LoraConfig, get_peft_model
65
+
66
+ lora_alpha = 16
67
+ lora_dropout = 0.1
68
+ lora_r = 64
69
+
70
+ peft_config = LoraConfig(
71
+ lora_alpha=lora_alpha,
72
+ lora_dropout=lora_dropout,
73
+ r=lora_r,
74
+ bias="none",
75
+ task_type="CAUSAL_LM"
76
+ )
77
+
78
+ """## Loading the trainer
79
+
80
+ Here we will use the [`SFTTrainer` from TRL library](https://huggingface.co/docs/trl/main/en/sft_trainer) that gives a wrapper around transformers `Trainer` to easily fine-tune models on instruction based datasets using PEFT adapters. Let's first load the training arguments below.
81
+ """
82
+
83
+ from transformers import TrainingArguments
84
+
85
+ output_dir = "./results"
86
+ per_device_train_batch_size = 4
87
+ gradient_accumulation_steps = 4
88
+ optim = "paged_adamw_32bit"
89
+ save_steps = 100
90
+ logging_steps = 10
91
+ learning_rate = 2e-4
92
+ max_grad_norm = 0.3
93
+ max_steps = 100
94
+ warmup_ratio = 0.03
95
+ lr_scheduler_type = "constant"
96
+
97
+ training_arguments = TrainingArguments(
98
+ output_dir=output_dir,
99
+ per_device_train_batch_size=per_device_train_batch_size,
100
+ gradient_accumulation_steps=gradient_accumulation_steps,
101
+ optim=optim,
102
+ save_steps=save_steps,
103
+ logging_steps=logging_steps,
104
+ learning_rate=learning_rate,
105
+ fp16=True,
106
+ max_grad_norm=max_grad_norm,
107
+ max_steps=max_steps,
108
+ warmup_ratio=warmup_ratio,
109
+ group_by_length=True,
110
+ lr_scheduler_type=lr_scheduler_type,
111
+ )
112
+
113
+ """Then finally pass everthing to the trainer"""
114
+
115
+ from trl import SFTTrainer
116
+
117
+ max_seq_length = 512
118
+
119
+ trainer = SFTTrainer(
120
+ model=model,
121
+ train_dataset=dataset,
122
+ peft_config=peft_config,
123
+ dataset_text_field="text",
124
+ max_seq_length=max_seq_length,
125
+ tokenizer=tokenizer,
126
+ args=training_arguments,
127
+ )
128
+
129
+ """We will also pre-process the model by upcasting the layer norms in float 32 for more stable training"""
130
+
131
+ for name, module in trainer.model.named_modules():
132
+ if "norm" in name:
133
+ module = module.to(torch.float32)
134
+
135
+ """## Train the model
136
+
137
+ Now let's train the model! Simply call `trainer.train()`
138
+ """
139
+
140
+ trainer.train()
141
+
142
+ """During training, the model should converge nicely as follows:
143
+
144
+ ![image](https://huggingface.co/datasets/trl-internal-testing/example-images/resolve/main/images/loss-falcon-7b.png)
145
+
146
+ The `SFTTrainer` also takes care of properly saving only the adapters during training instead of saving the entire model.
147
+ """
148
+
149
+ model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model # Take care of distributed/parallel training
150
+ model_to_save.save_pretrained("outputs")
151
+
152
+ lora_config = LoraConfig.from_pretrained('outputs')
153
+ model = get_peft_model(model, lora_config)
154
+
155
+ dataset['text']
156
+
157
+ text = "Écrire un texte dans un style baroque sur la glace et le feu ### Assistant: Si j'en luis éton"
158
+ device = "cuda:0"
159
+
160
+ inputs = tokenizer(text, return_tensors="pt").to(device)
161
+ outputs = model.generate(**inputs, max_new_tokens=50)
162
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
163
+
164
+ from huggingface_hub import login
165
+ login()
166
+
167
+ model.push_to_hub("llama2-qlora-finetunined-french")
168
+