Update README.md
Browse files
README.md
CHANGED
@@ -16,10 +16,88 @@ license: cc-by-4.0
|
|
16 |
|
17 |
This is a finetuned deepseek-coder-1.3b-base model for automatic code completion of Solidity code. The model was finetuned with QLoRA and an FIM transformed and Slither audited dataset. The corresponding dataset can be found at fbnhnsl/FIM_Solidity_Dataset on Hugging Face.
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
If you wish to use this model, you can cite it as follows:
|
20 |
|
|
|
21 |
@misc{hensel2025fim_model,
|
22 |
-
title = {Finetuned deepseek-coder-1.3b-base model for automatic code completion of Solidity code},
|
23 |
-
author={Fabian Hensel},
|
24 |
-
year={2025}
|
25 |
-
}
|
|
|
|
16 |
|
17 |
This is a finetuned deepseek-coder-1.3b-base model for automatic code completion of Solidity code. The model was finetuned with QLoRA and an FIM transformed and Slither audited dataset. The corresponding dataset can be found at fbnhnsl/FIM_Solidity_Dataset on Hugging Face.
|
18 |
|
19 |
+
Example usage:
|
20 |
+
```python
|
21 |
+
# Load the finetuned model
|
22 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
23 |
+
from peft import PeftModel
|
24 |
+
import torch
|
25 |
+
|
26 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
27 |
+
|
28 |
+
pretrained_checkpoint = 'deepseek-ai/deepseek-coder-1.3b-base'
|
29 |
+
finetuned_checkpoint = 'path/to/model'
|
30 |
+
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained(finetuned_checkpoint)
|
32 |
+
|
33 |
+
old_model = AutoModelForCausalLM.from_pretrained(pretrained_checkpoint)
|
34 |
+
old_model.resize_token_embeddings(len(tokenizer))
|
35 |
+
|
36 |
+
finetuned_model = PeftModel.from_pretrained(old_model, checkpoint).to(device)
|
37 |
+
|
38 |
+
# ----------------------------------------------------------------------------
|
39 |
+
# General automatic code completion
|
40 |
+
code_example = '''<|secure_function|>\tfunction add('''
|
41 |
+
|
42 |
+
model_inputs = tokenizer(code_example, return_tensors="pt").to(device)
|
43 |
+
|
44 |
+
input_ids = model_inputs["input_ids"]
|
45 |
+
attention_mask = model_inputs["attention_mask"]
|
46 |
+
|
47 |
+
generated_ids = finetuned_model.generate(input_ids,
|
48 |
+
do_sample=True,
|
49 |
+
max_length=256,
|
50 |
+
num_beams=4,
|
51 |
+
temperature=0.3,
|
52 |
+
pad_token_id=tokenizer.eos_token_id,
|
53 |
+
attention_mask=attention_mask)
|
54 |
+
|
55 |
+
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])
|
56 |
+
|
57 |
+
# Expected output:
|
58 |
+
# function add(uint256 a, uint256 b) internal pure returns (uint256) {
|
59 |
+
# return a + b;
|
60 |
+
# }
|
61 |
+
|
62 |
+
# ----------------------------------------------------------------------------
|
63 |
+
# Fill-in-the-middle
|
64 |
+
def generate_fim(prefix, suffix, model, tokenizer, max_length=256):
|
65 |
+
input_text = f"<|fim_begin|>{prefix}<|fim_hole|>{suffix}<|fim_end|>"
|
66 |
+
inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
|
67 |
+
outputs = model.generate(
|
68 |
+
inputs,
|
69 |
+
max_length=max_length,
|
70 |
+
num_beams=8,
|
71 |
+
temperature=0.3,
|
72 |
+
do_sample=True,
|
73 |
+
pad_token_id=tokenizer.eos_token_id
|
74 |
+
)
|
75 |
+
middle = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
|
76 |
+
return prefix + middle + suffix
|
77 |
+
|
78 |
+
prefix = '''pragma solidity ^0.8.0;\n\n'''
|
79 |
+
|
80 |
+
suffix = '''\n\ncontract FOO is Context, IERC20, Ownable {'''
|
81 |
+
|
82 |
+
print(generate_fim(prefix, suffix, finetuned_model, tokenizer))
|
83 |
+
|
84 |
+
# Expected output:
|
85 |
+
# pragma solidity ^0.8.0;
|
86 |
+
#
|
87 |
+
# import "@openzeppelin/contracts/utils/Context.sol" as Context;
|
88 |
+
# import "@openzeppelin/contracts/interfaces/IERC20.sol" as IERC20;
|
89 |
+
# import "@openzeppelin/contracts/access/Ownable.sol" as Ownable;
|
90 |
+
#
|
91 |
+
# contract FOO is Context, IERC20, Ownable {
|
92 |
+
|
93 |
+
```
|
94 |
+
|
95 |
If you wish to use this model, you can cite it as follows:
|
96 |
|
97 |
+
```latex
|
98 |
@misc{hensel2025fim_model,
|
99 |
+
title = {Finetuned deepseek-coder-1.3b-base model for automatic code completion of Solidity code},
|
100 |
+
author={Fabian Hensel},
|
101 |
+
year={2025}
|
102 |
+
}
|
103 |
+
```
|