Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- codeshell-chat-q4_0.gguf +3 -0
- modeling_codeshell.py +5 -121
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
codeshell-chat-q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
codeshell-chat-q4_0.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:124428f480ea1ee2451b78622e39b0fde88ea034acd34e933da56a1aa0b0bec1
|
| 3 |
+
size 4569817120
|
modeling_codeshell.py
CHANGED
|
@@ -32,17 +32,14 @@
|
|
| 32 |
"""PyTorch CodeShell model."""
|
| 33 |
import os
|
| 34 |
import math
|
| 35 |
-
from typing import List, Optional, Tuple, Union
|
| 36 |
-
from threading import Thread
|
| 37 |
-
from queue import Queue
|
| 38 |
-
|
| 39 |
|
| 40 |
import torch
|
| 41 |
import torch.utils.checkpoint
|
| 42 |
from torch import nn
|
| 43 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
| 44 |
|
| 45 |
-
from transformers import
|
| 46 |
from transformers.generation.utils import GenerationConfig
|
| 47 |
|
| 48 |
from transformers.activations import ACT2FN
|
|
@@ -57,6 +54,7 @@ from transformers.utils import (
|
|
| 57 |
)
|
| 58 |
from .configuration_codeshell import CodeShellConfig
|
| 59 |
|
|
|
|
| 60 |
# Fused kernels
|
| 61 |
# Use separate functions for each case because conditionals prevent kernel fusion.
|
| 62 |
# TODO: Could have better fused kernels depending on scaling, dropout and head mask.
|
|
@@ -745,62 +743,6 @@ class CodeShellModel(CodeShellPreTrainedModel):
|
|
| 745 |
hidden_states=all_hidden_states,
|
| 746 |
attentions=all_self_attentions,
|
| 747 |
)
|
| 748 |
-
|
| 749 |
-
class EndOfFunctionCriteria(StoppingCriteria):
|
| 750 |
-
"""Custom `StoppingCriteria` which checks if all generated functions in the batch are completed."""
|
| 751 |
-
def __init__(self, input_lengths, eof_strings, tokenizer):
|
| 752 |
-
self.input_lengths = input_lengths
|
| 753 |
-
self.eof_strings = eof_strings
|
| 754 |
-
self.tokenizer = tokenizer
|
| 755 |
-
|
| 756 |
-
def __call__(self, input_ids, scores, **kwargs):
|
| 757 |
-
"""Returns true if all generated sequences contain any of the end-of-function strings."""
|
| 758 |
-
decoded_generations = []
|
| 759 |
-
for _input_ids, input_length in zip(input_ids, self.input_lengths):
|
| 760 |
-
decoded_generations.append(self.tokenizer.decode(_input_ids[input_length:]))
|
| 761 |
-
done = []
|
| 762 |
-
for decoded_generation in decoded_generations:
|
| 763 |
-
done.append(
|
| 764 |
-
any(
|
| 765 |
-
[
|
| 766 |
-
stop_string in decoded_generation
|
| 767 |
-
for stop_string in self.eof_strings
|
| 768 |
-
]
|
| 769 |
-
)
|
| 770 |
-
)
|
| 771 |
-
return all(done)
|
| 772 |
-
|
| 773 |
-
class TextIterStreamer:
|
| 774 |
-
def __init__(self, tokenizer, skip_prompt=False, skip_special_tokens=False):
|
| 775 |
-
self.tokenizer = tokenizer
|
| 776 |
-
self.skip_prompt = skip_prompt
|
| 777 |
-
self.skip_special_tokens = skip_special_tokens
|
| 778 |
-
self.tokens = []
|
| 779 |
-
self.text_queue = Queue()
|
| 780 |
-
self.next_tokens_are_prompt = True
|
| 781 |
-
|
| 782 |
-
def put(self, value):
|
| 783 |
-
if self.skip_prompt and self.next_tokens_are_prompt:
|
| 784 |
-
self.next_tokens_are_prompt = False
|
| 785 |
-
else:
|
| 786 |
-
if len(value.shape) > 1:
|
| 787 |
-
value = value[0]
|
| 788 |
-
self.tokens.extend(value.tolist())
|
| 789 |
-
self.text_queue.put(
|
| 790 |
-
self.tokenizer.decode(self.tokens, skip_special_tokens=self.skip_special_tokens))
|
| 791 |
-
|
| 792 |
-
def end(self):
|
| 793 |
-
self.text_queue.put(None)
|
| 794 |
-
|
| 795 |
-
def __iter__(self):
|
| 796 |
-
return self
|
| 797 |
-
|
| 798 |
-
def __next__(self):
|
| 799 |
-
value = self.text_queue.get()
|
| 800 |
-
if value is None:
|
| 801 |
-
raise StopIteration()
|
| 802 |
-
else:
|
| 803 |
-
return value
|
| 804 |
|
| 805 |
|
| 806 |
@add_start_docstrings(
|
|
@@ -944,65 +886,6 @@ class CodeShellForCausalLM(CodeShellPreTrainedModel):
|
|
| 944 |
tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
|
| 945 |
)
|
| 946 |
return reordered_past
|
| 947 |
-
|
| 948 |
-
|
| 949 |
-
def build_chat_input(self, query, history, tokenizer, max_new_tokens=None):
|
| 950 |
-
user_name = "\n## human:"
|
| 951 |
-
ai_name = "\n## assistant: "
|
| 952 |
-
stop = '|<end>|'
|
| 953 |
-
|
| 954 |
-
prompt = ''
|
| 955 |
-
for q, r in history:
|
| 956 |
-
prompt += f"{user_name}{q}{stop}"
|
| 957 |
-
prompt += f"{ai_name}{r}{stop}"
|
| 958 |
-
prompt += f"{user_name}{query}{stop}"
|
| 959 |
-
prompt += ai_name.rstrip()
|
| 960 |
-
|
| 961 |
-
max_new_tokens = max_new_tokens or self.generation_config.max_new_tokens
|
| 962 |
-
max_input_tokens = self.config.n_positions - max_new_tokens
|
| 963 |
-
|
| 964 |
-
input_tokens = tokenizer.encode(prompt)
|
| 965 |
-
input_tokens = input_tokens[-max_input_tokens:] # truncate left
|
| 966 |
-
return torch.LongTensor([input_tokens]).to(self.device)
|
| 967 |
-
|
| 968 |
-
def chat(self, query, history, tokenizer, stream=False,
|
| 969 |
-
generation_config: Optional[GenerationConfig]=None):
|
| 970 |
-
generation_config = generation_config or self.generation_config
|
| 971 |
-
input_ids = self.build_chat_input(query, history, tokenizer, generation_config.max_new_tokens)
|
| 972 |
-
stopping_criteria = StoppingCriteriaList(
|
| 973 |
-
[EndOfFunctionCriteria([len(input_ids[0])], ['|<end>|', '<|endoftext|>'], tokenizer)]
|
| 974 |
-
)
|
| 975 |
-
|
| 976 |
-
if stream:
|
| 977 |
-
streamer = TextIterStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 978 |
-
Thread(target=self.generate, kwargs=dict(
|
| 979 |
-
inputs=input_ids, streamer=streamer,
|
| 980 |
-
stopping_criteria = stopping_criteria,
|
| 981 |
-
generation_config=generation_config,
|
| 982 |
-
)).start()
|
| 983 |
-
return streamer
|
| 984 |
-
else:
|
| 985 |
-
outputs = self.generate(input_ids, generation_config=generation_config, stopping_criteria = stopping_criteria)
|
| 986 |
-
response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
|
| 987 |
-
return response
|
| 988 |
-
|
| 989 |
-
def generate_stream(self, prompt, tokenizer, generation_config=None, **kwargs):
|
| 990 |
-
generation_config = generation_config or self.generation_config
|
| 991 |
-
max_input_tokens = self.config.n_positions - self.generation_config.max_new_tokens
|
| 992 |
-
|
| 993 |
-
input_ids = tokenizer.encode(prompt)
|
| 994 |
-
input_ids = input_ids[-max_input_tokens:] # truncate left
|
| 995 |
-
|
| 996 |
-
stopping_criteria = StoppingCriteriaList(
|
| 997 |
-
[EndOfFunctionCriteria([len(input_ids[0])], ['|<end>|', '<|endoftext|>'], tokenizer)]
|
| 998 |
-
)
|
| 999 |
-
|
| 1000 |
-
streamer = TextIterStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 1001 |
-
Thread(target=self.generate, kwargs=dict(
|
| 1002 |
-
inputs=input_ids, stopping_criteria=stopping_criteria, **kwargs
|
| 1003 |
-
)).start()
|
| 1004 |
-
return streamer
|
| 1005 |
-
|
| 1006 |
|
| 1007 |
class CodeShell4bitForCausalLM(CodeShellForCausalLM):
|
| 1008 |
def __init__(self, config):
|
|
@@ -1083,4 +966,5 @@ class CodeShell4bitForCausalLM(CodeShellForCausalLM):
|
|
| 1083 |
if device_map is not None:
|
| 1084 |
model = model.to(torch.device(device_map))
|
| 1085 |
|
| 1086 |
-
return model
|
|
|
|
|
|
| 32 |
"""PyTorch CodeShell model."""
|
| 33 |
import os
|
| 34 |
import math
|
| 35 |
+
from typing import List, Optional, Tuple, Union
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
import torch
|
| 38 |
import torch.utils.checkpoint
|
| 39 |
from torch import nn
|
| 40 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
| 41 |
|
| 42 |
+
from transformers import PreTrainedModel, PretrainedConfig
|
| 43 |
from transformers.generation.utils import GenerationConfig
|
| 44 |
|
| 45 |
from transformers.activations import ACT2FN
|
|
|
|
| 54 |
)
|
| 55 |
from .configuration_codeshell import CodeShellConfig
|
| 56 |
|
| 57 |
+
|
| 58 |
# Fused kernels
|
| 59 |
# Use separate functions for each case because conditionals prevent kernel fusion.
|
| 60 |
# TODO: Could have better fused kernels depending on scaling, dropout and head mask.
|
|
|
|
| 743 |
hidden_states=all_hidden_states,
|
| 744 |
attentions=all_self_attentions,
|
| 745 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
|
| 747 |
|
| 748 |
@add_start_docstrings(
|
|
|
|
| 886 |
tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
|
| 887 |
)
|
| 888 |
return reordered_past
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
|
| 890 |
class CodeShell4bitForCausalLM(CodeShellForCausalLM):
|
| 891 |
def __init__(self, config):
|
|
|
|
| 966 |
if device_map is not None:
|
| 967 |
model = model.to(torch.device(device_map))
|
| 968 |
|
| 969 |
+
return model
|
| 970 |
+
|