Spaces:
Runtime error
Runtime error
ml
Browse files- .gitignore +2 -1
- config.py +1 -1
- multi_language.py +171 -2
.gitignore
CHANGED
|
@@ -146,4 +146,5 @@ debug*
|
|
| 146 |
private*
|
| 147 |
crazy_functions/test_project/pdf_and_word
|
| 148 |
crazy_functions/test_samples
|
| 149 |
-
request_llm/jittorllms
|
|
|
|
|
|
| 146 |
private*
|
| 147 |
crazy_functions/test_project/pdf_and_word
|
| 148 |
crazy_functions/test_samples
|
| 149 |
+
request_llm/jittorllms
|
| 150 |
+
multi-language
|
config.py
CHANGED
|
@@ -54,7 +54,7 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
|
| 54 |
# 设置gradio的并行线程数(不需要修改)
|
| 55 |
CONCURRENT_COUNT = 100
|
| 56 |
|
| 57 |
-
#
|
| 58 |
ADD_WAIFU = False
|
| 59 |
|
| 60 |
# 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
|
|
|
|
| 54 |
# 设置gradio的并行线程数(不需要修改)
|
| 55 |
CONCURRENT_COUNT = 100
|
| 56 |
|
| 57 |
+
# 加一个live2d装饰
|
| 58 |
ADD_WAIFU = False
|
| 59 |
|
| 60 |
# 设置用户名和密码(不需要修改)(相关功能不稳定,与gradio版本和网络都相关,如果本地使用不建议加这个)
|
multi_language.py
CHANGED
|
@@ -1,4 +1,84 @@
|
|
| 1 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
def extract_chinese_characters(file_path):
|
| 4 |
with open(file_path, 'r', encoding='utf-8') as f:
|
|
@@ -29,6 +109,95 @@ directory_path = './'
|
|
| 29 |
chinese_characters = extract_chinese_characters_from_directory(directory_path)
|
| 30 |
word_to_translate = {}
|
| 31 |
for d in chinese_characters:
|
| 32 |
-
word_to_translate[d['word']] = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
print('All Chinese characters:', chinese_characters)
|
|
|
|
| 1 |
import os
|
| 2 |
+
import functools
|
| 3 |
+
import os
|
| 4 |
+
import pickle
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
CACHE_FOLDER = "gpt_log"
|
| 8 |
+
|
| 9 |
+
if not os.path.exists(CACHE_FOLDER):
|
| 10 |
+
os.makedirs(CACHE_FOLDER)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def lru_file_cache(maxsize=128, ttl=None, filename=None):
|
| 14 |
+
"""
|
| 15 |
+
Decorator that caches a function's return value after being called with given arguments.
|
| 16 |
+
It uses a Least Recently Used (LRU) cache strategy to limit the size of the cache.
|
| 17 |
+
maxsize: Maximum size of the cache. Defaults to 128.
|
| 18 |
+
ttl: Time-to-Live of the cache. If a value hasn't been accessed for `ttl` seconds, it will be evicted from the cache.
|
| 19 |
+
filename: Name of the file to store the cache in. If not supplied, the function name + ".cache" will be used.
|
| 20 |
+
"""
|
| 21 |
+
cache_path = os.path.join(CACHE_FOLDER, f"{filename}.cache") if filename is not None else None
|
| 22 |
+
|
| 23 |
+
def decorator_function(func):
|
| 24 |
+
cache = {}
|
| 25 |
+
_cache_info = {
|
| 26 |
+
"hits": 0,
|
| 27 |
+
"misses": 0,
|
| 28 |
+
"maxsize": maxsize,
|
| 29 |
+
"currsize": 0,
|
| 30 |
+
"ttl": ttl,
|
| 31 |
+
"filename": cache_path,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
@functools.wraps(func)
|
| 35 |
+
def wrapper_function(*args, **kwargs):
|
| 36 |
+
key = str((args, frozenset(kwargs)))
|
| 37 |
+
if key in cache:
|
| 38 |
+
if _cache_info["ttl"] is None or (cache[key][1] + _cache_info["ttl"]) >= time.time():
|
| 39 |
+
_cache_info["hits"] += 1
|
| 40 |
+
print(f'Warning, reading cache, last read {(time.time()-cache[key][1])//60} minutes ago'); time.sleep(2)
|
| 41 |
+
cache[key][1] = time.time()
|
| 42 |
+
return cache[key][0]
|
| 43 |
+
else:
|
| 44 |
+
del cache[key]
|
| 45 |
+
|
| 46 |
+
result = func(*args, **kwargs)
|
| 47 |
+
cache[key] = [result, time.time()]
|
| 48 |
+
_cache_info["misses"] += 1
|
| 49 |
+
_cache_info["currsize"] += 1
|
| 50 |
+
|
| 51 |
+
if _cache_info["currsize"] > _cache_info["maxsize"]:
|
| 52 |
+
oldest_key = None
|
| 53 |
+
for k in cache:
|
| 54 |
+
if oldest_key is None:
|
| 55 |
+
oldest_key = k
|
| 56 |
+
elif cache[k][1] < cache[oldest_key][1]:
|
| 57 |
+
oldest_key = k
|
| 58 |
+
del cache[oldest_key]
|
| 59 |
+
_cache_info["currsize"] -= 1
|
| 60 |
+
|
| 61 |
+
if cache_path is not None:
|
| 62 |
+
with open(cache_path, "wb") as f:
|
| 63 |
+
pickle.dump(cache, f)
|
| 64 |
+
|
| 65 |
+
return result
|
| 66 |
+
|
| 67 |
+
def cache_info():
|
| 68 |
+
return _cache_info
|
| 69 |
+
|
| 70 |
+
wrapper_function.cache_info = cache_info
|
| 71 |
+
|
| 72 |
+
if cache_path is not None and os.path.exists(cache_path):
|
| 73 |
+
with open(cache_path, "rb") as f:
|
| 74 |
+
cache = pickle.load(f)
|
| 75 |
+
_cache_info["currsize"] = len(cache)
|
| 76 |
+
|
| 77 |
+
return wrapper_function
|
| 78 |
+
|
| 79 |
+
return decorator_function
|
| 80 |
+
|
| 81 |
+
|
| 82 |
|
| 83 |
def extract_chinese_characters(file_path):
|
| 84 |
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
| 109 |
chinese_characters = extract_chinese_characters_from_directory(directory_path)
|
| 110 |
word_to_translate = {}
|
| 111 |
for d in chinese_characters:
|
| 112 |
+
word_to_translate[d['word']] = "TRANS"
|
| 113 |
+
|
| 114 |
+
def break_dictionary(d, n):
|
| 115 |
+
items = list(d.items())
|
| 116 |
+
num_dicts = (len(items) + n - 1) // n
|
| 117 |
+
return [{k: v for k, v in items[i*n:(i+1)*n]} for i in range(num_dicts)]
|
| 118 |
+
|
| 119 |
+
N_EACH_REQ = 50
|
| 120 |
+
word_to_translate_split = break_dictionary(word_to_translate, N_EACH_REQ)
|
| 121 |
+
LANG = "English"
|
| 122 |
+
|
| 123 |
+
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
|
| 124 |
+
def trans(words):
|
| 125 |
+
# from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 126 |
+
# from toolbox import get_conf, ChatBotWithCookies
|
| 127 |
+
# proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
|
| 128 |
+
# get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
|
| 129 |
+
# llm_kwargs = {
|
| 130 |
+
# 'api_key': API_KEY,
|
| 131 |
+
# 'llm_model': LLM_MODEL,
|
| 132 |
+
# 'top_p':1.0,
|
| 133 |
+
# 'max_length': None,
|
| 134 |
+
# 'temperature':0.0,
|
| 135 |
+
# }
|
| 136 |
+
# plugin_kwargs = {}
|
| 137 |
+
# chatbot = ChatBotWithCookies(llm_kwargs)
|
| 138 |
+
# history = []
|
| 139 |
+
# for gpt_say in request_gpt_model_in_new_thread_with_ui_alive(
|
| 140 |
+
# inputs=words, inputs_show_user=words,
|
| 141 |
+
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
| 142 |
+
# sys_prompt=f"Translate following words to {LANG}, replace `TRANS` with translated result."
|
| 143 |
+
# ):
|
| 144 |
+
# gpt_say = gpt_say[1][0][1]
|
| 145 |
+
# return gpt_say
|
| 146 |
+
return '{}'
|
| 147 |
+
|
| 148 |
+
translated_result = {}
|
| 149 |
+
for d in word_to_translate_split:
|
| 150 |
+
res = trans(str(d))
|
| 151 |
+
try:
|
| 152 |
+
# convert translated result back to python dictionary
|
| 153 |
+
res_dict = eval(res)
|
| 154 |
+
except:
|
| 155 |
+
print('Unexpected output.')
|
| 156 |
+
translated_result.update(res_dict)
|
| 157 |
+
|
| 158 |
+
print('All Chinese characters:', chinese_characters)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
# =================== create copy =====================
|
| 162 |
+
def copy_source_code():
|
| 163 |
+
"""
|
| 164 |
+
一键更新协议:备份和下载
|
| 165 |
+
"""
|
| 166 |
+
from toolbox import get_conf
|
| 167 |
+
import shutil
|
| 168 |
+
import os
|
| 169 |
+
import requests
|
| 170 |
+
import zipfile
|
| 171 |
+
try: shutil.rmtree(f'./multi-language/{LANG}/')
|
| 172 |
+
except: pass
|
| 173 |
+
os.makedirs(f'./multi-language', exist_ok=True)
|
| 174 |
+
backup_dir = f'./multi-language/{LANG}/'
|
| 175 |
+
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['multi-language', 'gpt_log', '.git', 'private_upload'])
|
| 176 |
+
copy_source_code()
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
for d in chinese_characters:
|
| 180 |
+
d['file'] = f'./multi-language/{LANG}/' + d['file']
|
| 181 |
+
if d['word'] in translated_result:
|
| 182 |
+
d['trans'] = translated_result[d['word']]
|
| 183 |
+
else:
|
| 184 |
+
d['trans'] = None
|
| 185 |
+
|
| 186 |
+
chinese_characters = sorted(chinese_characters, key=lambda x: len(x['word']), reverse=True)
|
| 187 |
+
for d in chinese_characters:
|
| 188 |
+
if d['trans'] is None:
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
with open(d['file'], 'r', encoding='utf-8') as f:
|
| 194 |
+
content = f.read()
|
| 195 |
+
|
| 196 |
+
content.replace(d['word'], d['trans'])
|
| 197 |
+
substring = d['trans']
|
| 198 |
+
substring_start_index = content.find(substring)
|
| 199 |
+
substring_end_index = substring_start_index + len(substring) - 1
|
| 200 |
+
if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
|
| 201 |
+
content = content[:substring_start_index+1]
|
| 202 |
+
|
| 203 |
|
|
|