ffreemt commited on
Commit
ab0881d
·
1 Parent(s): e34fdf8

Update ycecream loguru, add tz

Browse files
Files changed (2) hide show
  1. app.py +108 -46
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,53 +1,85 @@
1
- import gradio as gr
2
  import re
 
3
  from difflib import Differ
4
- from src.translation_agent.utils import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  LANGUAGES = {
7
- 'English': 'English',
8
- 'Español': 'Spanish',
9
- 'Français': 'French',
10
- 'Deutsch': 'German',
11
- 'Italiano': 'Italian',
12
- 'Português': 'Portuguese',
13
- 'Русский': 'Russian',
14
- '中文': 'Chinese',
15
- '日本語': 'Japanese',
16
- '한국어': 'Korean',
17
- 'العربية': 'Arabic',
18
- 'हिन्दी': 'Hindi',
19
  }
20
 
21
 
22
  def diff_texts(text1, text2, lang):
23
  d = Differ()
24
- ic(lang)
25
- if lang == '中文':
 
26
  return [
27
  (token[2:], token[0] if token[0] != " " else None)
28
  for token in d.compare(text1, text2)
29
  if token[0] in ["+", " "]
30
  ]
31
  else:
32
- words1 = re.findall(r'\S+|\s+', text1)
33
- words2 = re.findall(r'\S+|\s+', text2)
34
 
35
  return [
36
  (token[2:], token[0] if token[0] != " " else None)
37
  for token in d.compare(words1, words2)
38
  if token[0] in ["+", " "]
39
  ]
40
-
41
 
42
- def translate_text(source_lang, target_lang, source_text, country, max_tokens=MAX_TOKENS_PER_CHUNK):
 
 
 
 
 
 
 
43
  num_tokens_in_text = num_tokens_in_string(source_text)
44
 
45
- ic(num_tokens_in_text)
46
 
47
  if num_tokens_in_text < max_tokens:
48
- ic("Translating text as single chunk")
49
 
50
- #Note: use yield from B() if put yield in function B()
51
  translation_1 = one_chunk_initial_translation(
52
  source_lang, target_lang, source_text
53
  )
@@ -61,30 +93,32 @@ def translate_text(source_lang, target_lang, source_text, country, max_tokens=MA
61
  translation_2 = one_chunk_improve_translation(
62
  source_lang, target_lang, source_text, translation_1, reflection
63
  )
64
- translation_diff = diff_texts(translation_1, translation_2, target_lang)
 
 
65
  yield translation_1, reflection, translation_diff
66
 
67
  else:
68
- ic("Translating text as multiple chunks")
69
 
70
  token_size = calculate_chunk_size(
71
  token_count=num_tokens_in_text, token_limit=max_tokens
72
  )
73
 
74
- ic(token_size)
75
 
76
  text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
77
- model_name = "gpt-4",
78
  chunk_size=token_size,
79
  chunk_overlap=0,
80
  )
81
 
82
  source_text_chunks = text_splitter.split_text(source_text)
83
-
84
  translation_1_chunks = multichunk_initial_translation(
85
  source_lang, target_lang, source_text_chunks
86
  )
87
- ic(translation_1_chunks)
88
  translation_1 = "".join(translation_1_chunks)
89
  yield translation_1, None, None
90
 
@@ -95,7 +129,7 @@ def translate_text(source_lang, target_lang, source_text, country, max_tokens=MA
95
  translation_1_chunks,
96
  country,
97
  )
98
- ic(reflection_chunks)
99
  reflection = "".join(reflection_chunks)
100
  yield translation_1, reflection, None
101
 
@@ -106,24 +140,40 @@ def translate_text(source_lang, target_lang, source_text, country, max_tokens=MA
106
  translation_1_chunks,
107
  reflection_chunks,
108
  )
109
- ic(translation_2_chunks)
110
  translation_2 = "".join(translation_2_chunks)
111
- translation_diff = diff_texts(translation_1, translation_2, target_lang)
112
-
 
 
113
  yield translation_1, reflection, translation_diff
114
-
115
 
116
 
117
  def update_ui(translation_1, reflection, translation_diff):
118
- return gr.update(value=translation_1), gr.update(value=reflection), gr.update(value=translation_diff)
 
 
 
 
 
119
 
120
  with gr.Blocks() as demo:
121
  gr.Markdown("# Andrew Ng's Translation Agent ")
122
  with gr.Row():
123
- source_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='English', label="Source Language")
124
- target_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='中文', label="Target Language")
 
 
 
 
 
 
 
 
125
  country = gr.Textbox(label="Country (for target language)")
126
- source_text = gr.Textbox(label="Source Text", lines=5, show_copy_button=True)
 
 
127
 
128
  btn = gr.Button("Translate")
129
 
@@ -131,13 +181,25 @@ with gr.Blocks() as demo:
131
  translation_1 = gr.Textbox(label="Initial Translation", lines=3)
132
  reflection = gr.Textbox(label="Reflection", lines=3)
133
 
134
- translation_diff = gr.HighlightedText (label="Final Translation",
135
- combine_adjacent=True,
136
- show_legend=True,
137
- color_map={"+": "red"})
138
- #translation = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True)
139
-
140
- btn.click(translate_text, inputs=[source_lang, target_lang, source_text, country], outputs=[translation_1, reflection, translation_diff], queue=True)
141
- btn.click(update_ui, inputs=[translation_1, reflection, translation_diff], outputs=[translation_1, reflection, translation_diff], queue=True)
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
  demo.launch()
 
1
+ import os
2
  import re
3
+ import time
4
  from difflib import Differ
5
+
6
+ import gradio as gr
7
+ from loguru import logger
8
+ from ycecream import y
9
+
10
+ # from src.translation_agent.utils import *
11
+ from src.translation_agent.utils import MAX_TOKENS_PER_CHUNK
12
+ from src.translation_agent.utils import RecursiveCharacterTextSplitter
13
+ from src.translation_agent.utils import calculate_chunk_size
14
+ from src.translation_agent.utils import multichunk_improve_translation
15
+ from src.translation_agent.utils import multichunk_initial_translation
16
+ from src.translation_agent.utils import multichunk_reflect_on_translation
17
+ from src.translation_agent.utils import num_tokens_in_string
18
+ from src.translation_agent.utils import one_chunk_improve_translation
19
+ from src.translation_agent.utils import one_chunk_initial_translation
20
+ from src.translation_agent.utils import one_chunk_reflect_on_translation
21
+
22
+ y.configure(sln=1, show_time=1)
23
+
24
+ os.environ["TZ"] = "Asia/Shanghai"
25
+ try:
26
+ time.tzset() # type: ignore # pylint: disable=no-member
27
+ except Exception:
28
+ # Windows
29
+ y("Windows, cant run time.tzset()")
30
 
31
  LANGUAGES = {
32
+ "English": "English",
33
+ "Español": "Spanish",
34
+ "Français": "French",
35
+ "Deutsch": "German",
36
+ "Italiano": "Italian",
37
+ "Português": "Portuguese",
38
+ "Русский": "Russian",
39
+ "中文": "Chinese",
40
+ "日本語": "Japanese",
41
+ "한국어": "Korean",
42
+ "العربية": "Arabic",
43
+ "हिन्दी": "Hindi",
44
  }
45
 
46
 
47
  def diff_texts(text1, text2, lang):
48
  d = Differ()
49
+ # y(lang)
50
+ logger.info(lang)
51
+ if lang == "中文":
52
  return [
53
  (token[2:], token[0] if token[0] != " " else None)
54
  for token in d.compare(text1, text2)
55
  if token[0] in ["+", " "]
56
  ]
57
  else:
58
+ words1 = re.findall(r"\S+|\s+", text1)
59
+ words2 = re.findall(r"\S+|\s+", text2)
60
 
61
  return [
62
  (token[2:], token[0] if token[0] != " " else None)
63
  for token in d.compare(words1, words2)
64
  if token[0] in ["+", " "]
65
  ]
 
66
 
67
+
68
+ def translate_text(
69
+ source_lang,
70
+ target_lang,
71
+ source_text,
72
+ country,
73
+ max_tokens=MAX_TOKENS_PER_CHUNK,
74
+ ):
75
  num_tokens_in_text = num_tokens_in_string(source_text)
76
 
77
+ y(num_tokens_in_text)
78
 
79
  if num_tokens_in_text < max_tokens:
80
+ y("Translating text as single chunk")
81
 
82
+ # Note: use yield from B() if put yield in function B()
83
  translation_1 = one_chunk_initial_translation(
84
  source_lang, target_lang, source_text
85
  )
 
93
  translation_2 = one_chunk_improve_translation(
94
  source_lang, target_lang, source_text, translation_1, reflection
95
  )
96
+ translation_diff = diff_texts(
97
+ translation_1, translation_2, target_lang
98
+ )
99
  yield translation_1, reflection, translation_diff
100
 
101
  else:
102
+ y("Translating text as multiple chunks")
103
 
104
  token_size = calculate_chunk_size(
105
  token_count=num_tokens_in_text, token_limit=max_tokens
106
  )
107
 
108
+ y(token_size)
109
 
110
  text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
111
+ model_name="gpt-4",
112
  chunk_size=token_size,
113
  chunk_overlap=0,
114
  )
115
 
116
  source_text_chunks = text_splitter.split_text(source_text)
117
+
118
  translation_1_chunks = multichunk_initial_translation(
119
  source_lang, target_lang, source_text_chunks
120
  )
121
+ y(translation_1_chunks)
122
  translation_1 = "".join(translation_1_chunks)
123
  yield translation_1, None, None
124
 
 
129
  translation_1_chunks,
130
  country,
131
  )
132
+ y(reflection_chunks)
133
  reflection = "".join(reflection_chunks)
134
  yield translation_1, reflection, None
135
 
 
140
  translation_1_chunks,
141
  reflection_chunks,
142
  )
143
+ y(translation_2_chunks)
144
  translation_2 = "".join(translation_2_chunks)
145
+ translation_diff = diff_texts(
146
+ translation_1, translation_2, target_lang
147
+ )
148
+
149
  yield translation_1, reflection, translation_diff
 
150
 
151
 
152
  def update_ui(translation_1, reflection, translation_diff):
153
+ return (
154
+ gr.update(value=translation_1),
155
+ gr.update(value=reflection),
156
+ gr.update(value=translation_diff),
157
+ )
158
+
159
 
160
  with gr.Blocks() as demo:
161
  gr.Markdown("# Andrew Ng's Translation Agent ")
162
  with gr.Row():
163
+ source_lang = gr.Dropdown(
164
+ choices=list(LANGUAGES.keys()),
165
+ value="English",
166
+ label="Source Language",
167
+ )
168
+ target_lang = gr.Dropdown(
169
+ choices=list(LANGUAGES.keys()),
170
+ value="中文",
171
+ label="Target Language",
172
+ )
173
  country = gr.Textbox(label="Country (for target language)")
174
+ source_text = gr.Textbox(
175
+ label="Source Text", lines=5, show_copy_button=True
176
+ )
177
 
178
  btn = gr.Button("Translate")
179
 
 
181
  translation_1 = gr.Textbox(label="Initial Translation", lines=3)
182
  reflection = gr.Textbox(label="Reflection", lines=3)
183
 
184
+ translation_diff = gr.HighlightedText(
185
+ label="Final Translation",
186
+ combine_adjacent=True,
187
+ show_legend=True,
188
+ color_map={"+": "red"},
189
+ )
190
+ # translation = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True)
191
+
192
+ btn.click(
193
+ translate_text,
194
+ inputs=[source_lang, target_lang, source_text, country],
195
+ outputs=[translation_1, reflection, translation_diff],
196
+ queue=True,
197
+ )
198
+ btn.click(
199
+ update_ui,
200
+ inputs=[translation_1, reflection, translation_diff],
201
+ outputs=[translation_1, reflection, translation_diff],
202
+ queue=True,
203
+ )
204
 
205
  demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,5 @@
 
 
1
  aiofiles==23.2.1 ; python_version >= "3.9" and python_version < "4.0"
2
  altair==5.3.0 ; python_version >= "3.9" and python_version < "4.0"
3
  annotated-types==0.7.0 ; python_version >= "3.9" and python_version < "4.0"
 
1
+ ycecream
2
+ loguru
3
  aiofiles==23.2.1 ; python_version >= "3.9" and python_version < "4.0"
4
  altair==5.3.0 ; python_version >= "3.9" and python_version < "4.0"
5
  annotated-types==0.7.0 ; python_version >= "3.9" and python_version < "4.0"