Spaces:
Running
Running
| from ctransformers import AutoConfig, AutoModelForCausalLM | |
| from modules import shared | |
| from modules.callbacks import Iteratorize | |
| from modules.logging_colors import logger | |
| class CtransformersModel: | |
| def __init__(self): | |
| pass | |
| def from_pretrained(cls, path): | |
| result = cls() | |
| config = AutoConfig.from_pretrained( | |
| str(path), | |
| threads=shared.args.threads if shared.args.threads != 0 else -1, | |
| gpu_layers=shared.args.n_gpu_layers, | |
| batch_size=shared.args.n_batch, | |
| context_length=shared.args.n_ctx, | |
| stream=True, | |
| mmap=not shared.args.no_mmap, | |
| mlock=shared.args.mlock | |
| ) | |
| result.model = AutoModelForCausalLM.from_pretrained( | |
| str(result.model_dir(path) if result.model_type_is_auto() else path), | |
| model_type=(None if result.model_type_is_auto() else shared.args.model_type), | |
| config=config | |
| ) | |
| logger.info(f'Using ctransformers model_type: {result.model.model_type} for {result.model.model_path}') | |
| return result, result | |
| def model_type_is_auto(self): | |
| return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None" | |
| def model_dir(self, path): | |
| if path.is_file(): | |
| return path.parent | |
| return path | |
| def encode(self, string, **kwargs): | |
| return self.model.tokenize(string) | |
| def decode(self, ids): | |
| return self.model.detokenize(ids) | |
| def generate(self, prompt, state, callback=None): | |
| prompt = prompt if type(prompt) is str else prompt.decode() | |
| # ctransformers uses -1 for random seed | |
| generator = self.model( | |
| prompt=prompt, | |
| max_new_tokens=state['max_new_tokens'], | |
| temperature=state['temperature'], | |
| top_p=state['top_p'], | |
| top_k=state['top_k'], | |
| repetition_penalty=state['repetition_penalty'], | |
| last_n_tokens=state['repetition_penalty_range'], | |
| seed=int(state['seed']) | |
| ) | |
| output = "" | |
| for token in generator: | |
| if callback: | |
| callback(token) | |
| output += token | |
| return output | |
| def generate_with_streaming(self, *args, **kwargs): | |
| with Iteratorize(self.generate, args, kwargs, callback=None) as generator: | |
| reply = '' | |
| for token in generator: | |
| reply += token | |
| yield reply | |