Spaces:
Build error
Build error
Commit
·
5709a62
1
Parent(s):
4080581
Update app.py
Browse files
app.py
CHANGED
|
@@ -94,7 +94,14 @@ Death does not concern us, because as long as we exist, death is not here. And w
|
|
| 94 |
PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
|
| 95 |
|
| 96 |
# Translation logic
|
| 97 |
-
def translate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
prompt = f"## From {source_language}:\n{text}\n\n## To {target_language}:\n"
|
| 99 |
ctx = prompt.strip()
|
| 100 |
all_tokens = []
|
|
@@ -102,6 +109,9 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
|
|
| 102 |
out_str = ''
|
| 103 |
occurrence = {}
|
| 104 |
|
|
|
|
|
|
|
|
|
|
| 105 |
state = None
|
| 106 |
if inState != None:
|
| 107 |
state = universal_deepcopy(inState)
|
|
@@ -114,10 +124,21 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
|
|
| 114 |
# Generate things token by token
|
| 115 |
for i in range(ctx_limit):
|
| 116 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
| 118 |
if token in [0]: # EOS token
|
| 119 |
break
|
|
|
|
| 120 |
all_tokens += [token]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
tmp = pipeline.decode(all_tokens[out_last:])
|
| 122 |
if '\ufffd' not in tmp:
|
| 123 |
out_str += tmp
|
|
@@ -166,7 +187,6 @@ def translate(text, source_language, target_language, inState=PREFIX_STATE):
|
|
| 166 |
# Languages
|
| 167 |
LANGUAGES = [
|
| 168 |
"English",
|
| 169 |
-
"Zombie Speak",
|
| 170 |
"Chinese",
|
| 171 |
"Spanish",
|
| 172 |
"Bengali",
|
|
|
|
| 94 |
PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
|
| 95 |
|
| 96 |
# Translation logic
|
| 97 |
+
def translate(
|
| 98 |
+
text, source_language, target_language,
|
| 99 |
+
inState=PREFIX_STATE,
|
| 100 |
+
temperature=0.2,
|
| 101 |
+
top_p=0.5,
|
| 102 |
+
presencePenalty = 0.1,
|
| 103 |
+
countPenalty = 0.1,
|
| 104 |
+
):
|
| 105 |
prompt = f"## From {source_language}:\n{text}\n\n## To {target_language}:\n"
|
| 106 |
ctx = prompt.strip()
|
| 107 |
all_tokens = []
|
|
|
|
| 109 |
out_str = ''
|
| 110 |
occurrence = {}
|
| 111 |
|
| 112 |
+
alpha_frequency = countPenalty
|
| 113 |
+
alpha_presence = presencePenalty
|
| 114 |
+
|
| 115 |
state = None
|
| 116 |
if inState != None:
|
| 117 |
state = universal_deepcopy(inState)
|
|
|
|
| 124 |
# Generate things token by token
|
| 125 |
for i in range(ctx_limit):
|
| 126 |
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
| 127 |
+
for n in occurrence:
|
| 128 |
+
out[n] -= (alpha_presence + occurrence[n] * alpha_frequency)
|
| 129 |
+
token = pipeline.sample_logits(out, temperature=temperature, top_p=top_p)
|
| 130 |
+
|
| 131 |
if token in [0]: # EOS token
|
| 132 |
break
|
| 133 |
+
|
| 134 |
all_tokens += [token]
|
| 135 |
+
for xxx in occurrence:
|
| 136 |
+
occurrence[xxx] *= 0.996
|
| 137 |
+
if token not in occurrence:
|
| 138 |
+
occurrence[token] = 1
|
| 139 |
+
else:
|
| 140 |
+
occurrence[token] += 1
|
| 141 |
+
|
| 142 |
tmp = pipeline.decode(all_tokens[out_last:])
|
| 143 |
if '\ufffd' not in tmp:
|
| 144 |
out_str += tmp
|
|
|
|
| 187 |
# Languages
|
| 188 |
LANGUAGES = [
|
| 189 |
"English",
|
|
|
|
| 190 |
"Chinese",
|
| 191 |
"Spanish",
|
| 192 |
"Bengali",
|