Spaces:
Running
on
T4
Running
on
T4
sparkleman
commited on
Commit
·
4ed9fde
1
Parent(s):
94c4923
UPDATE: cache_word_list
Browse files
app.py
CHANGED
@@ -203,7 +203,8 @@ def generate(
|
|
203 |
out_tokens: List[int] = []
|
204 |
out_last = 0
|
205 |
|
206 |
-
|
|
|
207 |
|
208 |
for i in range(max_tokens):
|
209 |
for n in occurrence:
|
@@ -216,8 +217,8 @@ def generate(
|
|
216 |
|
217 |
if token == 0 and token in request.stop_tokens:
|
218 |
yield {
|
219 |
-
"content": "",
|
220 |
-
"tokens": out_tokens[out_last:],
|
221 |
"finish_reason": "stop:token:0",
|
222 |
"state": model_state,
|
223 |
}
|
@@ -231,11 +232,12 @@ def generate(
|
|
231 |
)
|
232 |
|
233 |
model_tokens.append(token)
|
|
|
234 |
|
235 |
if token in request.stop_tokens:
|
236 |
yield {
|
237 |
-
"content": "",
|
238 |
-
"tokens": out_tokens[out_last:],
|
239 |
"finish_reason": f"stop:token:{token}",
|
240 |
"state": model_state,
|
241 |
}
|
@@ -244,8 +246,6 @@ def generate(
|
|
244 |
gc.collect()
|
245 |
return
|
246 |
|
247 |
-
out_tokens.append(token)
|
248 |
-
|
249 |
for xxx in occurrence:
|
250 |
occurrence[xxx] *= request.penalty_decay
|
251 |
occurrence[token] = 1 + (occurrence[token] if token in occurrence else 0)
|
@@ -255,15 +255,15 @@ def generate(
|
|
255 |
if "\ufffd" in tmp:
|
256 |
continue
|
257 |
|
258 |
-
|
259 |
-
output_cache_str = "".join(
|
260 |
|
|
|
261 |
for stop_words in request.stop:
|
262 |
if stop_words in output_cache_str:
|
263 |
-
|
264 |
yield {
|
265 |
-
"content":
|
266 |
-
"tokens": out_tokens[out_last:],
|
267 |
"finish_reason": f"stop:words:{stop_words}",
|
268 |
"state": model_state,
|
269 |
}
|
@@ -272,11 +272,12 @@ def generate(
|
|
272 |
gc.collect()
|
273 |
return
|
274 |
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
|
|
280 |
|
281 |
out_last = i + 1
|
282 |
|
|
|
203 |
out_tokens: List[int] = []
|
204 |
out_last = 0
|
205 |
|
206 |
+
cache_word_list = []
|
207 |
+
cache_word_len = 4
|
208 |
|
209 |
for i in range(max_tokens):
|
210 |
for n in occurrence:
|
|
|
217 |
|
218 |
if token == 0 and token in request.stop_tokens:
|
219 |
yield {
|
220 |
+
"content": "".join(cache_word_list),
|
221 |
+
"tokens": out_tokens[out_last - cache_word_len :],
|
222 |
"finish_reason": "stop:token:0",
|
223 |
"state": model_state,
|
224 |
}
|
|
|
232 |
)
|
233 |
|
234 |
model_tokens.append(token)
|
235 |
+
out_tokens.append(token)
|
236 |
|
237 |
if token in request.stop_tokens:
|
238 |
yield {
|
239 |
+
"content": "".join(cache_word_list),
|
240 |
+
"tokens": out_tokens[out_last - cache_word_len :],
|
241 |
"finish_reason": f"stop:token:{token}",
|
242 |
"state": model_state,
|
243 |
}
|
|
|
246 |
gc.collect()
|
247 |
return
|
248 |
|
|
|
|
|
249 |
for xxx in occurrence:
|
250 |
occurrence[xxx] *= request.penalty_decay
|
251 |
occurrence[token] = 1 + (occurrence[token] if token in occurrence else 0)
|
|
|
255 |
if "\ufffd" in tmp:
|
256 |
continue
|
257 |
|
258 |
+
cache_word_list.append(tmp)
|
259 |
+
output_cache_str = "".join(cache_word_list)
|
260 |
|
261 |
+
print(output_cache_str)
|
262 |
for stop_words in request.stop:
|
263 |
if stop_words in output_cache_str:
|
|
|
264 |
yield {
|
265 |
+
"content": output_cache_str.replace(stop_words, ""),
|
266 |
+
"tokens": out_tokens[out_last - cache_word_len :],
|
267 |
"finish_reason": f"stop:words:{stop_words}",
|
268 |
"state": model_state,
|
269 |
}
|
|
|
272 |
gc.collect()
|
273 |
return
|
274 |
|
275 |
+
if len(cache_word_list) > cache_word_len:
|
276 |
+
yield {
|
277 |
+
"content": cache_word_list.pop(0),
|
278 |
+
"tokens": out_tokens[out_last - cache_word_len :],
|
279 |
+
"finish_reason": None,
|
280 |
+
}
|
281 |
|
282 |
out_last = i + 1
|
283 |
|