Spaces:
Sleeping
Sleeping
File size: 6,091 Bytes
ab0881d 78163ee ab0881d 78163ee ab0881d 789a2ce ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee 789a2ce 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee 789a2ce ab0881d 78163ee ab0881d 78163ee ab0881d 78163ee 789a2ce 78163ee ab0881d 78163ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import os
import re
import time
from difflib import Differ
import gradio as gr
from loguru import logger
from ycecream import y
# from src.translation_agent.utils import *
from src.translation_agent.utils import MAX_TOKENS_PER_CHUNK
from src.translation_agent.utils import RecursiveCharacterTextSplitter
from src.translation_agent.utils import calculate_chunk_size
from src.translation_agent.utils import multichunk_improve_translation
from src.translation_agent.utils import multichunk_initial_translation
from src.translation_agent.utils import multichunk_reflect_on_translation
from src.translation_agent.utils import num_tokens_in_string
from src.translation_agent.utils import one_chunk_improve_translation
from src.translation_agent.utils import one_chunk_initial_translation
from src.translation_agent.utils import one_chunk_reflect_on_translation
y.configure(sln=1, show_time=1)
os.environ["TZ"] = "Asia/Shanghai"
try:
time.tzset() # type: ignore # pylint: disable=no-member
except Exception:
# Windows
y("Windows, cant run time.tzset()")
LANGUAGES = {
"English": "English",
"Español": "Spanish",
"Français": "French",
"Deutsch": "German",
"Italiano": "Italian",
"Português": "Portuguese",
"Русский": "Russian",
"中文": "Chinese",
"日本語": "Japanese",
"한국어": "Korean",
"العربية": "Arabic",
"हिन्दी": "Hindi",
}
def diff_texts(text1, text2, lang):
d = Differ()
# y(lang)
logger.info(lang)
if lang == "中文":
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
if token[0] in ["+", " "]
]
else:
words1 = re.findall(r"\S+|\s+", text1)
words2 = re.findall(r"\S+|\s+", text2)
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(words1, words2)
if token[0] in ["+", " "]
]
def translate_text(
source_lang,
target_lang,
source_text,
country,
max_tokens=MAX_TOKENS_PER_CHUNK,
):
num_tokens_in_text = num_tokens_in_string(source_text)
y(num_tokens_in_text)
if num_tokens_in_text < max_tokens:
y("Translating text as single chunk")
# Note: use yield from B() if put yield in function B()
translation_1 = one_chunk_initial_translation(
source_lang, target_lang, source_text
)
yield translation_1, None, None
reflection = one_chunk_reflect_on_translation(
source_lang, target_lang, source_text, translation_1, country
)
yield translation_1, reflection, None
translation_2 = one_chunk_improve_translation(
source_lang, target_lang, source_text, translation_1, reflection
)
translation_diff = diff_texts(translation_1, translation_2, target_lang)
yield translation_1, reflection, translation_diff
else:
y("Translating text as multiple chunks")
token_size = calculate_chunk_size(
token_count=num_tokens_in_text, token_limit=max_tokens
)
y(token_size)
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
model_name="gpt-4",
chunk_size=token_size,
chunk_overlap=0,
)
source_text_chunks = text_splitter.split_text(source_text)
translation_1_chunks = multichunk_initial_translation(
source_lang, target_lang, source_text_chunks
)
y(translation_1_chunks)
translation_1 = "".join(translation_1_chunks)
yield translation_1, None, None
reflection_chunks = multichunk_reflect_on_translation(
source_lang,
target_lang,
source_text_chunks,
translation_1_chunks,
country,
)
y(reflection_chunks)
reflection = "".join(reflection_chunks)
yield translation_1, reflection, None
translation_2_chunks = multichunk_improve_translation(
source_lang,
target_lang,
source_text_chunks,
translation_1_chunks,
reflection_chunks,
)
y(translation_2_chunks)
translation_2 = "".join(translation_2_chunks)
translation_diff = diff_texts(translation_1, translation_2, target_lang)
yield translation_1, reflection, translation_diff
def update_ui(translation_1, reflection, translation_diff):
return (
gr.update(value=translation_1),
gr.update(value=reflection),
gr.update(value=translation_diff),
)
with gr.Blocks() as demo:
gr.Markdown("# Andrew Ng's Translation Agent ")
with gr.Row():
source_lang = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="English",
label="Source Language",
)
target_lang = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="中文",
label="Target Language",
)
country = gr.Textbox(label="Country (for target language)")
source_text = gr.Textbox(label="Source Text", lines=5, show_copy_button=True)
btn = gr.Button("Translate")
with gr.Row():
translation_1 = gr.Textbox(label="Initial Translation", lines=3)
reflection = gr.Textbox(label="Reflection", lines=3)
translation_diff = gr.HighlightedText(
label="Final Translation",
combine_adjacent=True,
show_legend=True,
color_map={"+": "red"},
)
# translation = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True)
btn.click(
translate_text,
inputs=[source_lang, target_lang, source_text, country],
outputs=[translation_1, reflection, translation_diff],
queue=True,
)
btn.click(
update_ui,
inputs=[translation_1, reflection, translation_diff],
outputs=[translation_1, reflection, translation_diff],
queue=True,
)
demo.launch()
|