xu song
commited on
Commit
·
d8e1d2b
1
Parent(s):
1e92d4c
update
Browse files- models/cpp_qwen2.py +10 -2
models/cpp_qwen2.py
CHANGED
|
@@ -128,6 +128,8 @@ class Qwen2Simulator(Simulator):
|
|
| 128 |
|
| 129 |
self.user_start_tokens = self.tokenize("<|im_start|>user\n")
|
| 130 |
self.assistant_start_tokens = self.tokenize("<|im_start|>assistant\n")
|
|
|
|
|
|
|
| 131 |
|
| 132 |
def tokenize(self, text):
|
| 133 |
return self.llm.tokenize(text.encode("utf-8"))
|
|
@@ -142,6 +144,13 @@ class Qwen2Simulator(Simulator):
|
|
| 142 |
return tokens
|
| 143 |
|
| 144 |
def generate(self, history, stream=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
if history[-1]['role'] in ["user"]:
|
| 146 |
start_tokens = self.assistant_start_tokens
|
| 147 |
elif history[-1]['role'] in ["assistant", "system"]:
|
|
@@ -162,8 +171,6 @@ class Qwen2Simulator(Simulator):
|
|
| 162 |
|
| 163 |
def _stream_generate(self, input_ids):
|
| 164 |
logger.info(f"generation_kwargs {self.generation_kwargs}")
|
| 165 |
-
|
| 166 |
-
# self.llm.generate .set_cache .last_n_tokens_size .reset .ctx ._ctx
|
| 167 |
output = self.llm.create_completion(
|
| 168 |
input_ids,
|
| 169 |
stream=True,
|
|
@@ -190,6 +197,7 @@ if __name__ == "__main__":
|
|
| 190 |
print(generated_text, generated_tokens)
|
| 191 |
|
| 192 |
for i in range(3):
|
|
|
|
| 193 |
messages.append(
|
| 194 |
{"role": "user" if i % 2 == 0 else "assistant", "content": generated_text, "tokens": generated_tokens})
|
| 195 |
print("######## requesting", messages)
|
|
|
|
| 128 |
|
| 129 |
self.user_start_tokens = self.tokenize("<|im_start|>user\n")
|
| 130 |
self.assistant_start_tokens = self.tokenize("<|im_start|>assistant\n")
|
| 131 |
+
# self.llm.generate .set_cache .last_n_tokens_size .reset .ctx ._ctx
|
| 132 |
+
# self.llm.set_cache()
|
| 133 |
|
| 134 |
def tokenize(self, text):
|
| 135 |
return self.llm.tokenize(text.encode("utf-8"))
|
|
|
|
| 144 |
return tokens
|
| 145 |
|
| 146 |
def generate(self, history, stream=True):
|
| 147 |
+
"""
|
| 148 |
+
额外前向:remains 5 to forward "<|im_end|>\n<|im_start|>assistant\n"
|
| 149 |
+
|
| 150 |
+
:param history:
|
| 151 |
+
:param stream:
|
| 152 |
+
:return:
|
| 153 |
+
"""
|
| 154 |
if history[-1]['role'] in ["user"]:
|
| 155 |
start_tokens = self.assistant_start_tokens
|
| 156 |
elif history[-1]['role'] in ["assistant", "system"]:
|
|
|
|
| 171 |
|
| 172 |
def _stream_generate(self, input_ids):
|
| 173 |
logger.info(f"generation_kwargs {self.generation_kwargs}")
|
|
|
|
|
|
|
| 174 |
output = self.llm.create_completion(
|
| 175 |
input_ids,
|
| 176 |
stream=True,
|
|
|
|
| 197 |
print(generated_text, generated_tokens)
|
| 198 |
|
| 199 |
for i in range(3):
|
| 200 |
+
generated_tokens = bot.strip_stoptokens(generated_tokens)
|
| 201 |
messages.append(
|
| 202 |
{"role": "user" if i % 2 == 0 else "assistant", "content": generated_text, "tokens": generated_tokens})
|
| 203 |
print("######## requesting", messages)
|