wony617 commited on
Commit
ec613d7
Β·
unverified Β·
2 Parent(s): 9e33f2c 4eb685d

Merge pull request #2 from Jwaminju/update-translator

Browse files
README.md CHANGED
@@ -54,7 +54,7 @@ This project was specifically created to solve [Hugging Face Transformers Issue
54
 
55
  ## πŸŽ₯ Demo Video
56
 
57
- [![Hugging Face i18n Agent Demo](https://img.youtube.com/vi/YOUR_VIDEO_ID/maxresdefault.jpg)](https://www.youtube.com/watch?v=YOUR_VIDEO_ID)
58
 
59
  *Watch the complete walkthrough: from setup to PR creation in under 5 minutes*
60
 
 
54
 
55
  ## πŸŽ₯ Demo Video
56
 
57
+ [Hugging Face i18n Agent Demo](https://youtu.be/J2MBMNk7la8?si=7867ztaU2nPN0UEo)
58
 
59
  *Watch the complete walkthrough: from setup to PR creation in under 5 minutes*
60
 
agent/handler.py CHANGED
@@ -8,10 +8,12 @@ import gradio as gr
8
 
9
  from agent.workflow import (
10
  report_translation_target_files,
 
11
  translate_docs_interactive,
12
  generate_github_pr,
13
  )
14
  from pr_generator.searcher import find_reference_pr_simple_stream
 
15
 
16
 
17
  # State management
@@ -21,6 +23,7 @@ class ChatState:
21
  self.target_language = "ko"
22
  self.k_files = 10
23
  self.files_to_translate = []
 
24
  self.current_file_content = {"translated": ""}
25
  self.pr_result = None # Store PR creation result
26
  # GitHub configuration
@@ -70,22 +73,29 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
70
  state.step = "find_files"
71
 
72
  status_report, files_list = report_translation_target_files(lang, k)
73
- state.files_to_translate = [file[0] for file in files_list] if files_list else []
 
 
 
 
 
 
 
74
 
75
  response = f"""**βœ… File search completed!**
76
 
77
  **Status Report:**
78
  {status_report}
79
-
80
  **πŸ“ Found first {len(state.files_to_translate)} files to translate:**
81
  """
82
 
83
  if state.files_to_translate:
84
- for i, file in enumerate(state.files_to_translate[:5], 1): # Show first 5
85
  response += f"\n{i}. `{file}`"
86
 
87
- if len(state.files_to_translate) > 5:
88
- response += f"\n... and {len(state.files_to_translate) - 5} more files"
89
 
90
  response += "\n\n**πŸš€ Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
91
  else:
@@ -96,7 +106,18 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
96
  cleared_input = ""
97
  selected_tab = 1 if state.files_to_translate else 0
98
 
99
- return history, cleared_input, update_status(), gr.Tabs(selected=selected_tab)
 
 
 
 
 
 
 
 
 
 
 
100
 
101
 
102
  def start_translation_process():
@@ -108,8 +129,8 @@ def start_translation_process():
108
 
109
  # Call translation function (simplified for demo)
110
  try:
111
- status, translated = translate_docs_interactive(
112
- state.target_language, [[current_file]]
113
  )
114
 
115
  state.current_file_content = {"translated": translated}
@@ -124,18 +145,24 @@ def start_translation_process():
124
  original_file_link = (
125
  "https://github.com/huggingface/transformers/blob/main/" + current_file
126
  )
 
 
 
127
  response = (
128
- f"""πŸ”„ Translation for: `{current_file}`**\n"""
129
  "**πŸ“„ Original Content Link:**\n"
130
  ""
131
  f"{original_file_link}\n"
132
  "**🌐 Translated Content:**\n"
133
- f"\n```\n\n{_extract_content_for_display(translated)}```\n"
134
- f"{status}\n"
 
 
 
 
135
  )
136
- print("translated:")
137
- print(translated)
138
- print("extracted")
139
 
140
  except Exception as e:
141
  response = f"❌ Translation failed: {str(e)}"
@@ -191,12 +218,14 @@ def handle_user_message(message, history):
191
  # User wants to start translation
192
  if state.files_to_translate:
193
  state.step = "translate"
194
- response = start_translation_process()
 
 
 
195
  else:
196
  response = (
197
  "❌ No files available for translation. Please search for files first."
198
  )
199
-
200
  # Handle GitHub PR creation - This part is removed as approve_handler is the main entry point
201
  else:
202
  # General response
@@ -288,14 +317,44 @@ def update_github_config(token, owner, repo, reference_pr_url):
288
  return f"βœ… GitHub configuration updated: {owner}/{repo}"
289
 
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  def send_message(message, history):
292
  new_history, cleared_input = handle_user_message(message, history)
293
  return new_history, cleared_input, update_status()
294
 
295
 
296
  # Button handlers with tab switching
297
- def start_translate_handler(history, anthropic_key):
298
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
 
 
 
299
  new_hist, cleared_input = handle_user_message("start translation", history)
300
  selected_tabs = 2 if state.current_file_content["translated"] else 0
301
  return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
@@ -363,11 +422,16 @@ def approve_handler(history, owner, repo, reference_pr_url):
363
  translated_content = state.current_file_content["translated"]
364
  response += "\n\nπŸš€ **Generating GitHub PR...**"
365
 
 
 
 
 
366
  pr_response = generate_github_pr(
367
  target_language=state.target_language,
368
  filepath=current_file,
369
  translated_content=translated_content,
370
  github_config=state.github_config,
 
371
  )
372
  response += f"\n{pr_response}"
373
  else:
 
8
 
9
  from agent.workflow import (
10
  report_translation_target_files,
11
+ report_in_translation_status_files,
12
  translate_docs_interactive,
13
  generate_github_pr,
14
  )
15
  from pr_generator.searcher import find_reference_pr_simple_stream
16
+ from translator.content import get_full_prompt, get_content, preprocess_content
17
 
18
 
19
  # State management
 
23
  self.target_language = "ko"
24
  self.k_files = 10
25
  self.files_to_translate = []
26
+ self.additional_instruction = ""
27
  self.current_file_content = {"translated": ""}
28
  self.pr_result = None # Store PR creation result
29
  # GitHub configuration
 
73
  state.step = "find_files"
74
 
75
  status_report, files_list = report_translation_target_files(lang, k)
76
+ in_progress_status_report, in_progress_docs = report_in_translation_status_files(
77
+ lang
78
+ )
79
+ state.files_to_translate = (
80
+ [file[0] for file in files_list if file[0] not in in_progress_docs]
81
+ if files_list
82
+ else []
83
+ )
84
 
85
  response = f"""**βœ… File search completed!**
86
 
87
  **Status Report:**
88
  {status_report}
89
+ {in_progress_status_report}
90
  **πŸ“ Found first {len(state.files_to_translate)} files to translate:**
91
  """
92
 
93
  if state.files_to_translate:
94
+ for i, file in enumerate(state.files_to_translate, 1):
95
  response += f"\n{i}. `{file}`"
96
 
97
+ # if len(state.files_to_translate) > 5:
98
+ # response += f"\n... and {len(state.files_to_translate) - 5} more files"
99
 
100
  response += "\n\n**πŸš€ Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
101
  else:
 
106
  cleared_input = ""
107
  selected_tab = 1 if state.files_to_translate else 0
108
 
109
+ # λ“œλ‘­λ‹€μš΄ choices둜 μ“Έ 파일 리슀트 λ°˜ν™˜ μΆ”κ°€
110
+ return (
111
+ history,
112
+ cleared_input,
113
+ update_status(),
114
+ gr.Tabs(selected=selected_tab),
115
+ update_dropdown_choices(state.files_to_translate),
116
+ )
117
+
118
+
119
+ def update_dropdown_choices(file_list):
120
+ return gr.update(choices=file_list, value=None)
121
 
122
 
123
  def start_translation_process():
 
129
 
130
  # Call translation function (simplified for demo)
131
  try:
132
+ translated = translate_docs_interactive(
133
+ state.target_language, [[current_file]], state.additional_instruction
134
  )
135
 
136
  state.current_file_content = {"translated": translated}
 
145
  original_file_link = (
146
  "https://github.com/huggingface/transformers/blob/main/" + current_file
147
  )
148
+ print("Compeleted translation:\n")
149
+ print(translated)
150
+ print("----------------------------")
151
  response = (
152
+ f"""πŸ”„ Translation for: `{current_file}`\n"""
153
  "**πŸ“„ Original Content Link:**\n"
154
  ""
155
  f"{original_file_link}\n"
156
  "**🌐 Translated Content:**\n"
157
+ # f"\n```\n\n{_extract_content_for_display(translated)}\n```"
158
+ # "\n```\n\n"
159
+ # f"\n{translated}\n"
160
+ # f"```"
161
+ # f"{status}\n"
162
+ # "βœ… Translation completed. The code block will be added when generating PR."
163
  )
164
+ return response, translated
165
+
 
166
 
167
  except Exception as e:
168
  response = f"❌ Translation failed: {str(e)}"
 
218
  # User wants to start translation
219
  if state.files_to_translate:
220
  state.step = "translate"
221
+ response, translated = start_translation_process()
222
+ history.append([message, response])
223
+ history.append(["", translated])
224
+ return history, ""
225
  else:
226
  response = (
227
  "❌ No files available for translation. Please search for files first."
228
  )
 
229
  # Handle GitHub PR creation - This part is removed as approve_handler is the main entry point
230
  else:
231
  # General response
 
317
  return f"βœ… GitHub configuration updated: {owner}/{repo}"
318
 
319
 
320
+ def update_prompt_preview(language, file_path, additional_instruction):
321
+ """Update prompt preview based on current settings"""
322
+ if not file_path.strip():
323
+ return "Select a file to see the prompt preview..."
324
+
325
+ try:
326
+ # Get language name
327
+ if language == "ko":
328
+ translation_lang = "Korean"
329
+ else:
330
+ translation_lang = language
331
+
332
+ # Get sample content (first 500 characters)
333
+ content = get_content(file_path)
334
+ to_translate = preprocess_content(content)
335
+
336
+ # Truncate for preview
337
+ sample_content = to_translate[:500] + ("..." if len(to_translate) > 500 else "")
338
+
339
+ # Generate prompt
340
+ prompt = get_full_prompt(translation_lang, sample_content, additional_instruction)
341
+
342
+ return prompt
343
+ except Exception as e:
344
+ return f"Error generating prompt preview: {str(e)}"
345
+
346
+
347
  def send_message(message, history):
348
  new_history, cleared_input = handle_user_message(message, history)
349
  return new_history, cleared_input, update_status()
350
 
351
 
352
  # Button handlers with tab switching
353
+ def start_translate_handler(history, anthropic_key, file_to_translate, additional_instruction=""):
354
  os.environ["ANTHROPIC_API_KEY"] = anthropic_key
355
+
356
+ state.additional_instruction = additional_instruction
357
+ state.files_to_translate = [file_to_translate]
358
  new_hist, cleared_input = handle_user_message("start translation", history)
359
  selected_tabs = 2 if state.current_file_content["translated"] else 0
360
  return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
 
422
  translated_content = state.current_file_content["translated"]
423
  response += "\n\nπŸš€ **Generating GitHub PR...**"
424
 
425
+ # Extract title from file for toctree mapping
426
+ file_name = current_file.split("/")[-1].replace(".md", "").replace("_", " ").title()
427
+ print(file_name)
428
+
429
  pr_response = generate_github_pr(
430
  target_language=state.target_language,
431
  filepath=current_file,
432
  translated_content=translated_content,
433
  github_config=state.github_config,
434
+ en_title=file_name,
435
  )
436
  response += f"\n{pr_response}"
437
  else:
agent/workflow.py CHANGED
@@ -11,7 +11,7 @@ from translator.content import (
11
  llm_translate,
12
  preprocess_content,
13
  )
14
- from translator.retriever import report
15
 
16
  # GitHub PR Agent import
17
  try:
@@ -38,8 +38,34 @@ def report_translation_target_files(
38
  return status_report, [[file] for file in filepath_list]
39
 
40
 
41
- def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  """Translate documentation."""
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # step 1. Get content from file path
44
  content = get_content(file_path)
45
  to_translate = preprocess_content(content)
@@ -47,21 +73,25 @@ def translate_docs(lang: str, file_path: str) -> tuple[str, str]:
47
  # step 2. Prepare prompt with docs content
48
  if lang == "ko":
49
  translation_lang = "Korean"
50
- to_translate_with_prompt = get_full_prompt(translation_lang, to_translate)
 
 
51
 
52
  # step 3. Translate with LLM
53
  # TODO: MCP clilent λ„˜κΈΈ λΆ€λΆ„
54
  callback_result, translated_content = llm_translate(to_translate_with_prompt)
55
-
 
56
  # step 4. Add scaffold to translation result
57
  translated_doc = fill_scaffold(content, to_translate, translated_content)
58
-
 
59
  return callback_result, translated_doc
60
 
61
 
62
  def translate_docs_interactive(
63
- translate_lang: str, selected_files: list[list[str]]
64
- ) -> tuple[str, str, str]:
65
  """Interactive translation function that processes files one by one.
66
 
67
  Args:
@@ -70,27 +100,17 @@ def translate_docs_interactive(
70
  """
71
  # Extract file paths from the dataframe format
72
  file_paths = [row[0] for row in selected_files if row and len(row) > 0]
73
- if not file_paths:
74
- return (
75
- "No files selected for translation.",
76
- gr.update(visible=False),
77
- gr.update(visible=False),
78
- gr.update(visible=False),
79
- [],
80
- 0,
81
- )
82
 
83
  # Start with the first file
84
  current_file = file_paths[0]
85
 
86
  status = f"βœ… Translation completed: `{current_file}` β†’ `{translate_lang}`\n\n"
87
- callback_result, translated_content = translate_docs(translate_lang, current_file)
88
  status += f"πŸ’° Used token and cost: \n```\n{callback_result}\n```"
89
 
90
- if len(file_paths) > 1:
91
- status += f"\n### πŸ“ Note: Currently, only the first file has been translated.\n> The remaining {len(file_paths) - 1} files have not been processed yet, as the system is in its beta version"
92
 
93
- return status, translated_content
94
 
95
 
96
  def generate_github_pr(
@@ -98,6 +118,7 @@ def generate_github_pr(
98
  filepath: str,
99
  translated_content: str = None,
100
  github_config: dict = None,
 
101
  ) -> str:
102
  """Generate a GitHub PR for translated documentation.
103
 
@@ -106,6 +127,7 @@ def generate_github_pr(
106
  filepath: Original file path (e.g., "docs/source/en/accelerator_selection.md")
107
  translated_content: Translated content (if None, read from file)
108
  github_config: GitHub configuration dictionary
 
109
 
110
  Returns:
111
  PR creation result message
@@ -149,9 +171,7 @@ def generate_github_pr(
149
  print(f" πŸ“ File: {filepath}")
150
  print(f" 🌍 Language: {target_language}")
151
  print(f" πŸ“Š Reference PR: {github_config['reference_pr_url']}")
152
- print(
153
- f" 🏠 Repository: {github_config['owner']}/{github_config['repo_name']}"
154
- )
155
 
156
  agent = GitHubPRAgent()
157
  result = agent.run_translation_pr_workflow(
@@ -163,14 +183,37 @@ def generate_github_pr(
163
  repo_name=github_config["repo_name"],
164
  base_branch=github_config.get("base_branch", "main"),
165
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  # Process result
 
 
 
 
 
 
 
 
168
  if result["status"] == "success":
169
  return f"""βœ… **GitHub PR Creation Successful!**
170
 
171
  πŸ”— **PR URL:** {result["pr_url"]}
172
  🌿 **Branch:** {result["branch"]}
173
- πŸ“ **File:** {result["file_path"]}
174
 
175
  {result["message"]}"""
176
 
@@ -178,7 +221,7 @@ def generate_github_pr(
178
  return f"""⚠️ **Partial Success**
179
 
180
  🌿 **Branch:** {result["branch"]}
181
- πŸ“ **File:** {result["file_path"]}
182
 
183
  {result["message"]}
184
 
 
11
  llm_translate,
12
  preprocess_content,
13
  )
14
+ from translator.retriever import report, get_github_issue_open_pr
15
 
16
  # GitHub PR Agent import
17
  try:
 
38
  return status_report, [[file] for file in filepath_list]
39
 
40
 
41
+ def report_in_translation_status_files(translate_lang: str) -> tuple[str, list[str]]:
42
+ docs, pr_info_list = get_github_issue_open_pr(translate_lang)
43
+
44
+ status_report = ""
45
+ if docs:
46
+ status_report = f"""\nπŸ€– Found {len(docs)} in progress for translation.
47
+ """
48
+ for i, file in enumerate(docs):
49
+ status_report += f"\n{i+1}. `{file}`: {pr_info_list[i]}"
50
+ status_report += "\n"
51
+ return status_report, docs
52
+
53
+
54
+ def translate_docs(lang: str, file_path: str, additional_instruction: str = "") -> tuple[str, str]:
55
  """Translate documentation."""
56
+ # Check if translation already exists
57
+ translation_file_path = (
58
+ Path(__file__).resolve().parent.parent
59
+ / f"translation_result/{file_path}"
60
+ )
61
+
62
+ if translation_file_path.exists():
63
+ print(f"πŸ“„ Found existing translation: {translation_file_path}")
64
+ with open(translation_file_path, "r", encoding="utf-8") as f:
65
+ existing_content = f.read()
66
+ if existing_content.strip():
67
+ return "Existing translation loaded (no tokens used)", existing_content
68
+
69
  # step 1. Get content from file path
70
  content = get_content(file_path)
71
  to_translate = preprocess_content(content)
 
73
  # step 2. Prepare prompt with docs content
74
  if lang == "ko":
75
  translation_lang = "Korean"
76
+ to_translate_with_prompt = get_full_prompt(translation_lang, to_translate, additional_instruction)
77
+
78
+ print("to_translate_with_prompt:\n", to_translate_with_prompt)
79
 
80
  # step 3. Translate with LLM
81
  # TODO: MCP clilent λ„˜κΈΈ λΆ€λΆ„
82
  callback_result, translated_content = llm_translate(to_translate_with_prompt)
83
+ print("translated_content:\n")
84
+ print(translated_content)
85
  # step 4. Add scaffold to translation result
86
  translated_doc = fill_scaffold(content, to_translate, translated_content)
87
+ print("translated_doc:\n")
88
+ print(translated_doc)
89
  return callback_result, translated_doc
90
 
91
 
92
  def translate_docs_interactive(
93
+ translate_lang: str, selected_files: list[list[str]], additional_instruction: str = ""
94
+ ) -> tuple[str, str]:
95
  """Interactive translation function that processes files one by one.
96
 
97
  Args:
 
100
  """
101
  # Extract file paths from the dataframe format
102
  file_paths = [row[0] for row in selected_files if row and len(row) > 0]
 
 
 
 
 
 
 
 
 
103
 
104
  # Start with the first file
105
  current_file = file_paths[0]
106
 
107
  status = f"βœ… Translation completed: `{current_file}` β†’ `{translate_lang}`\n\n"
108
+ callback_result, translated_content = translate_docs(translate_lang, current_file, additional_instruction)
109
  status += f"πŸ’° Used token and cost: \n```\n{callback_result}\n```"
110
 
111
+ print(status)
 
112
 
113
+ return translated_content
114
 
115
 
116
  def generate_github_pr(
 
118
  filepath: str,
119
  translated_content: str = None,
120
  github_config: dict = None,
121
+ en_title: str = None,
122
  ) -> str:
123
  """Generate a GitHub PR for translated documentation.
124
 
 
127
  filepath: Original file path (e.g., "docs/source/en/accelerator_selection.md")
128
  translated_content: Translated content (if None, read from file)
129
  github_config: GitHub configuration dictionary
130
+ en_title: English title for toctree mapping
131
 
132
  Returns:
133
  PR creation result message
 
171
  print(f" πŸ“ File: {filepath}")
172
  print(f" 🌍 Language: {target_language}")
173
  print(f" πŸ“Š Reference PR: {github_config['reference_pr_url']}")
174
+ print(f" 🏠 Repository: {github_config['owner']}/{github_config['repo_name']}")
 
 
175
 
176
  agent = GitHubPRAgent()
177
  result = agent.run_translation_pr_workflow(
 
183
  repo_name=github_config["repo_name"],
184
  base_branch=github_config.get("base_branch", "main"),
185
  )
186
+ # result = {
187
+ # 'status': 'partial_success',
188
+ # 'branch': 'ko-attention_interface',
189
+ # 'file_path': 'docs/source/ko/attention_interface.md',
190
+ # 'message': 'File was saved and commit was successful.\nPR creation failed: ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1', 'error_details': 'ERROR: Existing PR found: https://github.com/Jwaminju/transformers/pull/1'
191
+ # }
192
+ # Process toctree update after successful translation PR
193
+ toctree_result = None
194
+ if en_title:
195
+ from agent.toctree_handler import TocTreeHandler
196
+ toctree_handler = TocTreeHandler()
197
+ toctree_result = toctree_handler.update_toctree_after_translation(
198
+ result, en_title, filepath, agent, github_config
199
+ )
200
+ print("toctree_result:", toctree_result)
201
 
202
  # Process result
203
+ # Generate toctree status message (shared for both success and partial_success)
204
+ toctree_status = ""
205
+ if toctree_result:
206
+ if toctree_result["status"] == "success":
207
+ toctree_status = f"\nπŸ“‹ **Toctree Updated:** βœ… {toctree_result['message']}"
208
+ else:
209
+ toctree_status = f"\nπŸ“‹ **Toctree Update Failed:** ❌ {toctree_result['message']}"
210
+
211
  if result["status"] == "success":
212
  return f"""βœ… **GitHub PR Creation Successful!**
213
 
214
  πŸ”— **PR URL:** {result["pr_url"]}
215
  🌿 **Branch:** {result["branch"]}
216
+ πŸ“ **File:** {result["file_path"]}{toctree_status}
217
 
218
  {result["message"]}"""
219
 
 
221
  return f"""⚠️ **Partial Success**
222
 
223
  🌿 **Branch:** {result["branch"]}
224
+ πŸ“ **File:** {result["file_path"]}{toctree_status}
225
 
226
  {result["message"]}
227
 
app.py CHANGED
@@ -14,6 +14,7 @@ from agent.handler import (
14
  send_message,
15
  start_translate_handler,
16
  sync_language_displays,
 
17
  update_status,
18
  update_github_config,
19
  )
@@ -30,7 +31,7 @@ css = """
30
  background: rgba(255, 255, 180, 0.25);
31
  border-radius: 18px;
32
  box-shadow: 0 4px 24px rgba(0,0,0,0.08);
33
- padding: 1.5em;
34
  backdrop-filter: blur(8px);
35
  border: 1px solid rgba(255,255,180,0.25);
36
  width: 100%;
@@ -40,10 +41,12 @@ css = """
40
  background: rgba(255, 255, 180, 0.25);
41
  border-radius: 18px;
42
  box-shadow: 0 4px 24px rgba(0,0,0,0.08);
43
- padding: 1.5em;
44
  backdrop-filter: blur(8px);
45
  border: 1px solid rgba(255,255,180,0.25);
46
  width: 100%;
 
 
47
  }
48
  .status-card {
49
  width: 100%
@@ -91,7 +94,6 @@ css = """
91
  with gr.Blocks(
92
  css=css, title=" 🌐 Hugging Face Transformers Docs i18n made easy"
93
  ) as demo:
94
-
95
  # Title
96
  with open("images/hfkr_logo.png", "rb") as img_file:
97
  base64_img = base64.b64encode(img_file.read()).decode()
@@ -105,11 +107,12 @@ with gr.Blocks(
105
  # Content
106
  with gr.Row():
107
  # Chat interface
108
- with gr.Column(scale=4, elem_classes=["chat-container"]):
109
  gr.Markdown("### 🌐 Hugging Face i18n Agent")
110
 
111
  chatbot = gr.Chatbot(
112
- value=[[None, get_welcome_message()]], scale=1, height=585
 
113
  )
114
 
115
  # Controller interface
@@ -122,16 +125,15 @@ with gr.Blocks(
122
  with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
123
  with gr.TabItem("1. Find Files", id=0):
124
  with gr.Group():
125
- lang_dropdown = gr.Dropdown(
126
  choices=[language.value for language in Languages],
127
  label="🌍 Translate To",
128
  value="ko",
129
  )
130
  k_input = gr.Number(
131
  label="πŸ“Š First k missing translated docs",
132
- value=1,
133
  minimum=1,
134
- maximum=100,
135
  )
136
  find_btn = gr.Button(
137
  "πŸ” Find Files to Translate",
@@ -140,6 +142,17 @@ with gr.Blocks(
140
 
141
  with gr.TabItem("2. Translate", id=1):
142
  with gr.Group():
 
 
 
 
 
 
 
 
 
 
 
143
  translate_lang_display = gr.Dropdown(
144
  choices=[language.value for language in Languages],
145
  label="🌍 Translation Language",
@@ -150,6 +163,21 @@ with gr.Blocks(
150
  label="πŸ”‘ Anthropic API key for translation generation",
151
  type="password",
152
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  start_translate_btn = gr.Button(
154
  "πŸš€ Start Translation", elem_classes="action-button"
155
  )
@@ -186,7 +214,7 @@ with gr.Blocks(
186
 
187
  # Chat Controller
188
  with gr.Column(elem_classes=["control-panel"]):
189
- gr.Markdown("### πŸ’¬ Chat with agent")
190
  msg_input = gr.Textbox(
191
  placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
192
  container=False,
@@ -199,7 +227,7 @@ with gr.Blocks(
199
  find_btn.click(
200
  fn=process_file_search_handler,
201
  inputs=[lang_dropdown, k_input, chatbot],
202
- outputs=[chatbot, msg_input, status_display, control_tabs],
203
  )
204
 
205
  # Sync language across tabs
@@ -209,10 +237,17 @@ with gr.Blocks(
209
  outputs=[translate_lang_display],
210
  )
211
 
 
 
 
 
 
 
 
212
  # Button event handlers
213
  start_translate_btn.click(
214
  fn=start_translate_handler,
215
- inputs=[chatbot, anthropic_key],
216
  outputs=[chatbot, msg_input, status_display, control_tabs],
217
  )
218
 
@@ -247,5 +282,13 @@ with gr.Blocks(
247
  outputs=[chatbot, msg_input, status_display],
248
  )
249
 
 
 
 
 
 
 
 
 
250
  root_path = os.environ.get("GRADIO_ROOT_PATH")
251
  demo.launch(root_path=root_path)
 
14
  send_message,
15
  start_translate_handler,
16
  sync_language_displays,
17
+ update_prompt_preview,
18
  update_status,
19
  update_github_config,
20
  )
 
31
  background: rgba(255, 255, 180, 0.25);
32
  border-radius: 18px;
33
  box-shadow: 0 4px 24px rgba(0,0,0,0.08);
34
+ padding: 1.0em;
35
  backdrop-filter: blur(8px);
36
  border: 1px solid rgba(255,255,180,0.25);
37
  width: 100%;
 
41
  background: rgba(255, 255, 180, 0.25);
42
  border-radius: 18px;
43
  box-shadow: 0 4px 24px rgba(0,0,0,0.08);
44
+ padding: 1.0em;
45
  backdrop-filter: blur(8px);
46
  border: 1px solid rgba(255,255,180,0.25);
47
  width: 100%;
48
+ overflow: visible !important;
49
+
50
  }
51
  .status-card {
52
  width: 100%
 
94
  with gr.Blocks(
95
  css=css, title=" 🌐 Hugging Face Transformers Docs i18n made easy"
96
  ) as demo:
 
97
  # Title
98
  with open("images/hfkr_logo.png", "rb") as img_file:
99
  base64_img = base64.b64encode(img_file.read()).decode()
 
107
  # Content
108
  with gr.Row():
109
  # Chat interface
110
+ with gr.Column(scale=3, elem_classes=["chat-container"]):
111
  gr.Markdown("### 🌐 Hugging Face i18n Agent")
112
 
113
  chatbot = gr.Chatbot(
114
+ value=[[None, get_welcome_message()]], scale=1, height=585,
115
+ show_copy_button=True
116
  )
117
 
118
  # Controller interface
 
125
  with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
126
  with gr.TabItem("1. Find Files", id=0):
127
  with gr.Group():
128
+ lang_dropdown = gr.Radio(
129
  choices=[language.value for language in Languages],
130
  label="🌍 Translate To",
131
  value="ko",
132
  )
133
  k_input = gr.Number(
134
  label="πŸ“Š First k missing translated docs",
135
+ value=10,
136
  minimum=1,
 
137
  )
138
  find_btn = gr.Button(
139
  "πŸ” Find Files to Translate",
 
142
 
143
  with gr.TabItem("2. Translate", id=1):
144
  with gr.Group():
145
+ files_to_translate = gr.Radio(
146
+ choices=[],
147
+ label="πŸ“„ Select a file to translate",
148
+ interactive=True,
149
+ value=None,
150
+ )
151
+ file_to_translate_input = gr.Textbox(
152
+ label="🌍 Select in the dropdown or write the file path to translate",
153
+ value="",
154
+ )
155
+
156
  translate_lang_display = gr.Dropdown(
157
  choices=[language.value for language in Languages],
158
  label="🌍 Translation Language",
 
163
  label="πŸ”‘ Anthropic API key for translation generation",
164
  type="password",
165
  )
166
+ additional_instruction = gr.Textbox(
167
+ label="πŸ“ Additional instructions (Optional - e.g., custom glossary)",
168
+ placeholder="Example: Translate 'model' as 'λͺ¨λΈ' consistently",
169
+ lines=2,
170
+ )
171
+
172
+ with gr.Accordion("πŸ” Preview Prompt", open=False):
173
+ prompt_preview = gr.Textbox(
174
+ label="Current Translation Prompt",
175
+ lines=8,
176
+ interactive=False,
177
+ placeholder="Select a file and language to see the prompt preview...",
178
+ show_copy_button=True,
179
+ )
180
+
181
  start_translate_btn = gr.Button(
182
  "πŸš€ Start Translation", elem_classes="action-button"
183
  )
 
214
 
215
  # Chat Controller
216
  with gr.Column(elem_classes=["control-panel"]):
217
+ gr.Markdown("### πŸ’¬ Chat with agent (Only simple chat is available)")
218
  msg_input = gr.Textbox(
219
  placeholder="Type your message here... (e.g. 'what', 'how', or 'help')",
220
  container=False,
 
227
  find_btn.click(
228
  fn=process_file_search_handler,
229
  inputs=[lang_dropdown, k_input, chatbot],
230
+ outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
231
  )
232
 
233
  # Sync language across tabs
 
237
  outputs=[translate_lang_display],
238
  )
239
 
240
+ #
241
+ files_to_translate.change(
242
+ fn=lambda x: x,
243
+ inputs=[files_to_translate],
244
+ outputs=[file_to_translate_input],
245
+ )
246
+
247
  # Button event handlers
248
  start_translate_btn.click(
249
  fn=start_translate_handler,
250
+ inputs=[chatbot, anthropic_key, file_to_translate_input, additional_instruction],
251
  outputs=[chatbot, msg_input, status_display, control_tabs],
252
  )
253
 
 
282
  outputs=[chatbot, msg_input, status_display],
283
  )
284
 
285
+ # Update prompt preview when inputs change
286
+ for input_component in [translate_lang_display, file_to_translate_input, additional_instruction]:
287
+ input_component.change(
288
+ fn=update_prompt_preview,
289
+ inputs=[translate_lang_display, file_to_translate_input, additional_instruction],
290
+ outputs=[prompt_preview],
291
+ )
292
+
293
  root_path = os.environ.get("GRADIO_ROOT_PATH")
294
  demo.launch(root_path=root_path)
pr_generator/agent.py CHANGED
@@ -518,7 +518,7 @@ Please return only the commit message. No other explanation is needed."""
518
  "status": "partial_success",
519
  "branch": branch_name,
520
  "file_path": target_filepath,
521
- "message": f"File was saved but PR creation failed: {pr_result}",
522
  "error_details": pr_result,
523
  }
524
  elif "successful" in pr_result and "http" in pr_result:
 
518
  "status": "partial_success",
519
  "branch": branch_name,
520
  "file_path": target_filepath,
521
+ "message": f"File was saved and commit was successful.\nPR creation failed: {pr_result}",
522
  "error_details": pr_result,
523
  }
524
  elif "successful" in pr_result and "http" in pr_result:
translation_result/docs/source/en/accelerator_selection.md CHANGED
@@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
16
 
17
  # 가속기 선택 [[accelerator-selection]]
18
 
19
- λΆ„μ‚° ν›ˆλ ¨ 쀑에 μ‚¬μš©ν•  가속기(CUDA, XPU, MPS, HPU λ“±)의 μˆ˜μ™€ μˆœμ„œλ₯Ό μ§€μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€. μ΄λŠ” μ„œλ‘œ λ‹€λ₯Έ μ—°μ‚° μ„±λŠ₯을 κ°€μ§„ 가속기가 있고 더 λΉ λ₯Έ 가속기λ₯Ό λ¨Όμ € μ‚¬μš©ν•˜κ³  싢을 λ•Œ μœ μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. λ˜λŠ” μ‚¬μš© κ°€λŠ₯ν•œ 가속기 쀑 μΌλΆ€λ§Œ μ‚¬μš©ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)κ³Ό [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) λͺ¨λ‘μ—μ„œ μž‘λ™ν•©λ‹ˆλ‹€. Accelerateλ‚˜ [DeepSpeed integration](./main_classes/deepspeed)이 ν•„μš”ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.
20
 
21
  이 κ°€μ΄λ“œλŠ” μ‚¬μš©ν•  κ°€μ†κΈ°μ˜ μˆ˜μ™€ μ‚¬μš© μˆœμ„œλ₯Ό μ„ νƒν•˜λŠ” 방법을 λ³΄μ—¬μ€λ‹ˆλ‹€.
22
 
@@ -27,7 +27,7 @@ rendered properly in your Markdown viewer.
27
  <hfoptions id="select-accelerator">
28
  <hfoption id="torchrun">
29
 
30
- `--nproc_per_node`λ₯Ό μ‚¬μš©ν•˜μ—¬ μ‚¬μš©ν•  가속기 수λ₯Ό μ„ νƒν•˜μ„Έμš”.
31
 
32
  ```bash
33
  torchrun --nproc_per_node=2 trainer-program.py ...
@@ -36,7 +36,7 @@ torchrun --nproc_per_node=2 trainer-program.py ...
36
  </hfoption>
37
  <hfoption id="Accelerate">
38
 
39
- `--num_processes`λ₯Ό μ‚¬μš©ν•˜μ—¬ μ‚¬μš©ν•  가속기 수λ₯Ό μ„ νƒν•˜μ„Έμš”.
40
 
41
  ```bash
42
  accelerate launch --num_processes 2 trainer-program.py ...
@@ -45,7 +45,7 @@ accelerate launch --num_processes 2 trainer-program.py ...
45
  </hfoption>
46
  <hfoption id="DeepSpeed">
47
 
48
- `--num_gpus`λ₯Ό μ‚¬μš©ν•˜μ—¬ μ‚¬μš©ν•  GPU 수λ₯Ό μ„ νƒν•˜μ„Έμš”.
49
 
50
  ```bash
51
  deepspeed --num_gpus 2 trainer-program.py ...
@@ -55,7 +55,7 @@ deepspeed --num_gpus 2 trainer-program.py ...
55
  </hfoptions>
56
 
57
  ## 가속기 μˆœμ„œ [[order-of-accelerators]]
58
- μ‚¬μš©ν•  νŠΉμ • 가속기와 κ·Έ μˆœμ„œλ₯Ό μ„ νƒν•˜λ €λ©΄ ν•˜λ“œμ›¨μ–΄μ— μ ν•©ν•œ ν™˜κ²½ λ³€μˆ˜λ₯Ό μ‚¬μš©ν•˜μ„Έμš”. μ΄λŠ” 각 μ‹€ν–‰λ§ˆλ‹€ λͺ…λ Ήμ€„μ—μ„œ μ„€μ •λ˜λŠ” κ²½μš°κ°€ λ§Žμ§€λ§Œ, `~/.bashrc`λ‚˜ λ‹€λ₯Έ μ‹œμž‘ μ„€μ • νŒŒμΌμ— μΆ”κ°€ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.
59
 
60
  예λ₯Ό λ“€μ–΄, 4개의 가속기(0, 1, 2, 3)κ°€ 있고 가속기 0κ³Ό 2만 μ‹€ν–‰ν•˜κ³  μ‹Άλ‹€λ©΄:
61
 
@@ -66,7 +66,7 @@ deepspeed --num_gpus 2 trainer-program.py ...
66
  CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
67
  ```
68
 
69
- GPU 0κ³Ό 2만 PyTorch에 "보이며" 각각 `cuda:0`κ³Ό `cuda:1`둜 λ§€ν•‘λ©λ‹ˆλ‹€.
70
  μˆœμ„œλ₯Ό λ°”κΎΈλ €λ©΄ (GPU 2λ₯Ό `cuda:0`으둜, GPU 0을 `cuda:1`둜 μ‚¬μš©):
71
 
72
 
@@ -80,15 +80,15 @@ GPU 없이 μ‹€ν–‰ν•˜λ €λ©΄:
80
  CUDA_VISIBLE_DEVICES= python trainer-program.py ...
81
  ```
82
 
83
- `CUDA_DEVICE_ORDER`λ₯Ό μ‚¬μš©ν•˜μ—¬ CUDA μž₯치 μˆœμ„œλ₯Ό μ œμ–΄ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€:
84
 
85
- - PCIe λ²„μŠ€ ID μˆœμ„œλ‘œ μ •λ ¬ (`nvidia-smi`와 일치):
86
 
87
  ```bash
88
  $hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
89
  ```
90
 
91
- - μ—°μ‚° μ„±λŠ₯ μˆœμ„œλ‘œ μ •λ ¬ (κ°€μž₯ λΉ λ₯Έ 것뢀터):
92
 
93
  ```bash
94
  export CUDA_DEVICE_ORDER=FASTEST_FIRST
@@ -101,7 +101,7 @@ $hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
101
  ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
102
  ```
103
 
104
- XPU 0κ³Ό 2만 PyTorch에 "보이며" 각각 `xpu:0`κ³Ό `xpu:1`둜 λ§€ν•‘λ©λ‹ˆλ‹€.
105
  μˆœμ„œλ₯Ό λ°”κΎΈλ €λ©΄ (XPU 2λ₯Ό `xpu:0`으둜, XPU 0을 `xpu:1`둜 μ‚¬μš©):
106
 
107
  ```bash
@@ -109,13 +109,13 @@ ZE_AFFINITY_MASK=2,0 torchrun trainer-program.py ...
109
  ```
110
 
111
 
112
- λ‹€μŒμœΌλ‘œ Intel XPU μˆœμ„œλ₯Ό μ œμ–΄ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€:
113
 
114
  ```bash
115
  export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
116
  ```
117
 
118
- Intel XPU의 μž₯치 μ—΄κ±° 및 정렬에 λŒ€ν•œ μžμ„Έν•œ μ •λ³΄λŠ” [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) λ¬Έμ„œλ₯Ό μ°Έμ‘°ν•˜μ„Έμš”.
119
 
120
  </hfoption>
121
  </hfoptions>
@@ -123,5 +123,5 @@ Intel XPU의 μž₯치 μ—΄κ±° 및 정렬에 λŒ€ν•œ μžμ„Έν•œ μ •λ³΄λŠ” [Level Zero]
123
 
124
 
125
  > [!WARNING]
126
- > ν™˜κ²½ λ³€μˆ˜λŠ” λͺ…령쀄에 μΆ”κ°€ν•˜λŠ” λŒ€μ‹  exportν•  수 μžˆμŠ΅λ‹ˆλ‹€. ν™˜κ²½ λ³€μˆ˜κ°€ μ–΄λ–»κ²Œ μ„€μ •λ˜μ—ˆλŠ”μ§€ μžŠμ–΄λ²„λ¦¬κ³  κ²°κ΅­ 잘λͺ»λœ 가속기λ₯Ό μ‚¬μš©ν•˜κ²Œ 될 수 μžˆμ–΄ ν˜Όλž€μŠ€λŸ¬μšΈ 수 μžˆμœΌλ―€λ‘œ ꢌμž₯ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. λŒ€μ‹ , λ™μΌν•œ λͺ…λ Ήμ€„μ—μ„œ νŠΉμ • ν›ˆλ ¨ 싀행에 λŒ€ν•΄ ν™˜κ²½ λ³€μˆ˜λ₯Ό μ„€μ •ν•˜λŠ” 것이 일반적인 κ΄€λ‘€μž…λ‹ˆλ‹€.
127
  ```
 
16
 
17
  # 가속기 선택 [[accelerator-selection]]
18
 
19
+ λΆ„μ‚° ν•™μŠ΅ μ€‘μ—λŠ” μ‚¬μš©ν•  가속기(CUDA, XPU, MPS, HPU λ“±)의 μˆ˜μ™€ μˆœμ„œλ₯Ό μ§€μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€. μ΄λŠ” μ„œλ‘œ λ‹€λ₯Έ μ»΄ν“¨νŒ… μ„±λŠ₯을 κ°€μ§„ 가속기가 μžˆμ„ λ•Œ 더 λΉ λ₯Έ 가속기λ₯Ό λ¨Όμ € μ‚¬μš©ν•˜κ³  싢은 κ²½μš°μ— μœ μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. λ˜λŠ” μ‚¬μš© κ°€λŠ₯ν•œ κ°€μ†κΈ°μ˜ μΌλΆ€λ§Œ μ‚¬μš©ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)κ³Ό [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) λͺ¨λ‘μ—μ„œ μž‘λ™ν•©λ‹ˆλ‹€. Accelerateλ‚˜ [DeepSpeed integration](./main_classes/deepspeed)λŠ” ν•„μš”ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.
20
 
21
  이 κ°€μ΄λ“œλŠ” μ‚¬μš©ν•  κ°€μ†κΈ°μ˜ μˆ˜μ™€ μ‚¬μš© μˆœμ„œλ₯Ό μ„ νƒν•˜λŠ” 방법을 λ³΄μ—¬μ€λ‹ˆλ‹€.
22
 
 
27
  <hfoptions id="select-accelerator">
28
  <hfoption id="torchrun">
29
 
30
+ `--nproc_per_node`λ₯Ό μ‚¬μš©ν•˜μ—¬ μ‚¬μš©ν•  가속기 수λ₯Ό μ„ νƒν•©λ‹ˆλ‹€.
31
 
32
  ```bash
33
  torchrun --nproc_per_node=2 trainer-program.py ...
 
36
  </hfoption>
37
  <hfoption id="Accelerate">
38
 
39
+ `--num_processes`λ₯Ό μ‚¬μš©ν•˜μ—¬ μ‚¬μš©ν•  가속기 수λ₯Ό μ„ νƒν•©λ‹ˆλ‹€.
40
 
41
  ```bash
42
  accelerate launch --num_processes 2 trainer-program.py ...
 
45
  </hfoption>
46
  <hfoption id="DeepSpeed">
47
 
48
+ `--num_gpus`λ₯Ό μ‚¬μš©ν•˜μ—¬ μ‚¬μš©ν•  GPU 수λ₯Ό μ„ νƒν•©λ‹ˆλ‹€.
49
 
50
  ```bash
51
  deepspeed --num_gpus 2 trainer-program.py ...
 
55
  </hfoptions>
56
 
57
  ## 가속기 μˆœμ„œ [[order-of-accelerators]]
58
+ μ‚¬μš©ν•  νŠΉμ • 가속기와 κ·Έ μˆœμ„œλ₯Ό μ„ νƒν•˜λ €λ©΄ ν•˜λ“œμ›¨μ–΄μ— μ ν•©ν•œ ν™˜κ²½ λ³€μˆ˜λ₯Ό μ‚¬μš©ν•˜μ„Έμš”. μ΄λŠ” μ’…μ’… 각 싀행에 λŒ€ν•΄ λͺ…λ Ήμ€„μ—μ„œ μ„€μ •λ˜μ§€λ§Œ, `~/.bashrc`λ‚˜ λ‹€λ₯Έ μ‹œμž‘ ꡬ성 νŒŒμΌμ— μΆ”κ°€ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.
59
 
60
  예λ₯Ό λ“€μ–΄, 4개의 가속기(0, 1, 2, 3)κ°€ 있고 가속기 0κ³Ό 2만 μ‹€ν–‰ν•˜κ³  μ‹Άλ‹€λ©΄:
61
 
 
66
  CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
67
  ```
68
 
69
+ GPU 0κ³Ό 2만 PyTorchμ—μ„œ "보이며" 각각 `cuda:0`κ³Ό `cuda:1`둜 λ§€ν•‘λ©λ‹ˆλ‹€.
70
  μˆœμ„œλ₯Ό λ°”κΎΈλ €λ©΄ (GPU 2λ₯Ό `cuda:0`으둜, GPU 0을 `cuda:1`둜 μ‚¬μš©):
71
 
72
 
 
80
  CUDA_VISIBLE_DEVICES= python trainer-program.py ...
81
  ```
82
 
83
+ `CUDA_DEVICE_ORDER`λ₯Ό μ‚¬μš©ν•˜μ—¬ CUDA μž₯치의 μˆœμ„œλ₯Ό μ œμ–΄ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€:
84
 
85
+ - PCIe λ²„μŠ€ ID μˆœμ„œ (`nvidia-smi`와 일치):
86
 
87
  ```bash
88
  $hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
89
  ```
90
 
91
+ - μ»΄ν“¨νŒ… μ„±λŠ₯ μˆœμ„œ (κ°€μž₯ λΉ λ₯Έ 것뢀터):
92
 
93
  ```bash
94
  export CUDA_DEVICE_ORDER=FASTEST_FIRST
 
101
  ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
102
  ```
103
 
104
+ XPU 0κ³Ό 2만 PyTorchμ—μ„œ "보이며" 각각 `xpu:0`κ³Ό `xpu:1`둜 λ§€ν•‘λ©λ‹ˆλ‹€.
105
  μˆœμ„œλ₯Ό λ°”κΎΈλ €λ©΄ (XPU 2λ₯Ό `xpu:0`으둜, XPU 0을 `xpu:1`둜 μ‚¬μš©):
106
 
107
  ```bash
 
109
  ```
110
 
111
 
112
+ λ‹€μŒμ„ μ‚¬μš©ν•˜μ—¬ Intel XPU의 μˆœμ„œλ₯Ό μ œμ–΄ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€:
113
 
114
  ```bash
115
  export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
116
  ```
117
 
118
+ Intel XPUμ—μ„œμ˜ μž₯치 μ—΄κ±° 및 정렬에 λŒ€ν•œ μžμ„Έν•œ μ •λ³΄λŠ” [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) λ¬Έμ„œλ₯Ό μ°Έμ‘°ν•˜μ„Έμš”.
119
 
120
  </hfoption>
121
  </hfoptions>
 
123
 
124
 
125
  > [!WARNING]
126
+ > ν™˜κ²½ λ³€μˆ˜λŠ” λͺ…령쀄에 μΆ”κ°€ν•˜λŠ” λŒ€μ‹  내보낼 수 μžˆμŠ΅λ‹ˆλ‹€. ν™˜κ²½ λ³€μˆ˜κ°€ μ–΄λ–»κ²Œ μ„€μ •λ˜μ—ˆλŠ”μ§€ μžŠμ–΄λ²„λ¦¬κ³  잘λͺ»λœ 가속기λ₯Ό μ‚¬μš©ν•˜κ²Œ 될 수 μžˆμ–΄ ν˜Όλž€μ„ μ•ΌκΈ°ν•  수 μžˆμœΌλ―€λ‘œ ꢌμž₯ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. λŒ€μ‹ , 같은 λͺ…λ Ήμ€„μ—μ„œ νŠΉμ • ν›ˆλ ¨ 싀행을 μœ„ν•΄ ν™˜κ²½ λ³€μˆ˜λ₯Ό μ„€μ •ν•˜λŠ” 것이 일반적인 κ΄€λ‘€μž…λ‹ˆλ‹€.
127
  ```
translator/content.py CHANGED
@@ -5,8 +5,13 @@ import requests
5
  from langchain.callbacks import get_openai_callback
6
  from langchain_anthropic import ChatAnthropic
7
 
 
 
8
 
9
  def get_content(filepath: str) -> str:
 
 
 
10
  url = string.Template(
11
  "https://raw.githubusercontent.com/huggingface/" "transformers/main/$filepath"
12
  ).safe_substitute(filepath=filepath)
@@ -24,24 +29,31 @@ def preprocess_content(content: str) -> str:
24
  ## ignore top license comment
25
  to_translate = content[content.find("#") :]
26
  ## remove code blocks from text
27
- to_translate = re.sub(r"```.*?```", "", to_translate, flags=re.DOTALL)
28
  ## remove markdown tables from text
29
- to_translate = re.sub(r"^\|.*\|$\n?", "", to_translate, flags=re.MULTILINE)
30
  ## remove empty lines from text
31
  to_translate = re.sub(r"\n\n+", "\n\n", to_translate)
32
-
33
  return to_translate
34
 
35
 
36
- def get_full_prompt(language: str, to_translate: str) -> str:
37
- prompt = string.Template(
38
  "What do these sentences about Hugging Face Transformers "
39
  "(a machine learning library) mean in $language? "
40
  "Please do not translate the word after a πŸ€— emoji "
41
- "as it is a product name. Output only the translated markdown result "
42
- "without any explanations or introductions.\n\n```md"
43
  ).safe_substitute(language=language)
44
- return "\n".join([prompt, to_translate.strip(), "```"])
 
 
 
 
 
 
 
 
45
 
46
 
47
  def split_markdown_sections(markdown: str) -> list:
@@ -64,33 +76,89 @@ def make_scaffold(content: str, to_translate: str) -> string.Template:
64
  scaffold = content
65
  for i, text in enumerate(to_translate.split("\n\n")):
66
  scaffold = scaffold.replace(text, f"$hf_i18n_placeholder{i}", 1)
 
 
67
  return string.Template(scaffold)
68
 
69
 
 
 
 
 
 
 
 
70
  def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
71
  scaffold = make_scaffold(content, to_translate)
 
 
 
 
 
 
 
72
  divided = split_markdown_sections(to_translate)
 
 
73
  anchors = get_anchors(divided)
74
-
75
- translated = split_markdown_sections(translated)
76
-
77
- translated[1::3] = [
78
- f"{korean_title} {anchors[i]}"
79
- for i, korean_title in enumerate(translated[1::3])
80
- ]
81
- translated = "".join(
82
- ["".join(translated[i * 3 : i * 3 + 3]) for i in range(len(translated) // 3)]
83
- ).split("\n\n")
84
- if newlines := scaffold.template.count("$hf_i18n_placeholder") - len(translated):
85
- return str(
86
- [
87
- f"Please {'recover' if newlines > 0 else 'remove'} "
88
- f"{abs(newlines)} incorrectly inserted double newlines."
89
- ]
90
- )
91
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  translated_doc = scaffold.safe_substitute(
93
- {f"hf_i18n_placeholder{i}": text for i, text in enumerate(translated)}
94
  )
95
  return translated_doc
96
 
 
5
  from langchain.callbacks import get_openai_callback
6
  from langchain_anthropic import ChatAnthropic
7
 
8
+ from translator.prompt_glossary import PROMPT_WITH_GLOSSARY
9
+
10
 
11
  def get_content(filepath: str) -> str:
12
+ if filepath == "":
13
+ raise ValueError("No files selected for translation.")
14
+
15
  url = string.Template(
16
  "https://raw.githubusercontent.com/huggingface/" "transformers/main/$filepath"
17
  ).safe_substitute(filepath=filepath)
 
29
  ## ignore top license comment
30
  to_translate = content[content.find("#") :]
31
  ## remove code blocks from text
32
+ # to_translate = re.sub(r"```.*?```", "", to_translate, flags=re.DOTALL)
33
  ## remove markdown tables from text
34
+ # to_translate = re.sub(r"^\|.*\|$\n?", "", to_translate, flags=re.MULTILINE)
35
  ## remove empty lines from text
36
  to_translate = re.sub(r"\n\n+", "\n\n", to_translate)
 
37
  return to_translate
38
 
39
 
40
+ def get_full_prompt(language: str, to_translate: str, additional_instruction: str = "") -> str:
41
+ base_prompt = string.Template(
42
  "What do these sentences about Hugging Face Transformers "
43
  "(a machine learning library) mean in $language? "
44
  "Please do not translate the word after a πŸ€— emoji "
45
+ "as it is a product name. Output the complete markdown file**, with prose translated and all other content intact"
46
+ "No explanations or extrasβ€”only the translated markdown. Also translate all comments within code blocks as well."
47
  ).safe_substitute(language=language)
48
+
49
+ base_prompt += "\n\n```md"
50
+
51
+ full_prompt = "\n".join([base_prompt, to_translate.strip(), "```", PROMPT_WITH_GLOSSARY])
52
+
53
+ if additional_instruction.strip():
54
+ full_prompt += f"\n\nπŸ—’οΈ Additional instructions: {additional_instruction.strip()}"
55
+
56
+ return full_prompt
57
 
58
 
59
  def split_markdown_sections(markdown: str) -> list:
 
76
  scaffold = content
77
  for i, text in enumerate(to_translate.split("\n\n")):
78
  scaffold = scaffold.replace(text, f"$hf_i18n_placeholder{i}", 1)
79
+ print("inner scaffold:")
80
+ print(scaffold)
81
  return string.Template(scaffold)
82
 
83
 
84
+ def is_in_code_block(text: str, position: int) -> bool:
85
+ """Check if a position in text is inside a code block"""
86
+ text_before = text[:position]
87
+ code_block_starts = text_before.count("```")
88
+ return code_block_starts % 2 == 1
89
+
90
+
91
  def fill_scaffold(content: str, to_translate: str, translated: str) -> str:
92
  scaffold = make_scaffold(content, to_translate)
93
+ print("scaffold:")
94
+ print(scaffold.template)
95
+
96
+ # Get original text sections to maintain structure
97
+ original_sections = to_translate.split("\n\n")
98
+
99
+ # Split markdown sections to get headers and anchors
100
  divided = split_markdown_sections(to_translate)
101
+ print("divided:")
102
+ print(divided)
103
  anchors = get_anchors(divided)
104
+
105
+ # Split translated content by markdown sections
106
+ translated_divided = split_markdown_sections(translated)
107
+ print("translated divided:")
108
+ print(translated_divided)
109
+
110
+ # Ensure we have the same number of headers as the original
111
+ if len(translated_divided[1::3]) != len(anchors):
112
+ print(f"Warning: Header count mismatch. Original: {len(anchors)}, Translated: {len(translated_divided[1::3])}")
113
+ # Adjust anchors list to match translated headers
114
+ if len(translated_divided[1::3]) < len(anchors):
115
+ anchors = anchors[:len(translated_divided[1::3])]
116
+ else:
117
+ # Add empty anchors for extra headers
118
+ anchors.extend([""] * (len(translated_divided[1::3]) - len(anchors)))
119
+
120
+ # Add anchors to translated headers only if they're not in code blocks
121
+ for i, korean_title in enumerate(translated_divided[1::3]):
122
+ if i < len(anchors):
123
+ # Find the position of this header in the original translated text
124
+ header_pos = translated.find(korean_title.strip())
125
+ if header_pos != -1 and not is_in_code_block(translated, header_pos):
126
+ translated_divided[1 + i * 3] = f"{korean_title} {anchors[i]}"
127
+ else:
128
+ translated_divided[1 + i * 3] = korean_title
129
+
130
+ # Reconstruct translated content with proper structure
131
+ reconstructed_translated = "".join([
132
+ "".join(translated_divided[i * 3 : i * 3 + 3])
133
+ for i in range(len(translated_divided) // 3)
134
+ ])
135
+
136
+ # Split by double newlines to match original structure
137
+ translated_sections = reconstructed_translated.split("\n\n")
138
+
139
+ print("scaffold template count:")
140
+ print(scaffold.template.count("$hf_i18n_placeholder"))
141
+ print("original sections length:")
142
+ print(len(original_sections))
143
+ print("translated sections length:")
144
+ print(len(translated_sections))
145
+
146
+ # Ensure section counts match
147
+ placeholder_count = scaffold.template.count("$hf_i18n_placeholder")
148
+
149
+ if len(translated_sections) < placeholder_count:
150
+ # Add empty sections if translated has fewer sections
151
+ translated_sections.extend([""] * (placeholder_count - len(translated_sections)))
152
+ elif len(translated_sections) > placeholder_count:
153
+ # Truncate if translated has more sections
154
+ translated_sections = translated_sections[:placeholder_count]
155
+
156
+ # Final check
157
+ if len(translated_sections) != placeholder_count:
158
+ return f"Error: Section count mismatch. Expected: {placeholder_count}, Got: {len(translated_sections)}"
159
+
160
  translated_doc = scaffold.safe_substitute(
161
+ {f"hf_i18n_placeholder{i}": text for i, text in enumerate(translated_sections)}
162
  )
163
  return translated_doc
164
 
translator/retriever.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  from pathlib import Path
3
 
@@ -25,6 +26,59 @@ def get_github_repo_files():
25
  return file_paths
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
29
  """
30
  Retrieve missing docs
 
1
+ import re
2
  import os
3
  from pathlib import Path
4
 
 
26
  return file_paths
27
 
28
 
29
+ def get_github_issue_open_pr(lang: str = "ko"):
30
+ """
31
+ Get open PR in the github issue, filtered by title starting with '🌐 [i18n-KO]'.
32
+ """
33
+ if lang == "ko":
34
+ issue_id = "20179"
35
+ else:
36
+ raise ValueError(
37
+ "No Github issue has been registered to the server. (Only 'ko' is supported - please contact us to support this.)"
38
+ )
39
+
40
+ headers = {
41
+ "Accept": "application/vnd.github+json",
42
+ }
43
+
44
+ all_open_prs = []
45
+ page = 1
46
+ per_page = 100 # Maximum allowed by GitHub API
47
+
48
+ while True:
49
+ url = f"https://api.github.com/repos/huggingface/transformers/pulls?state=open&page={page}&per_page={per_page}"
50
+ response = requests.get(url, headers=headers)
51
+
52
+ if response.status_code != 200:
53
+ raise Exception(f"GitHub API error: {response.status_code} {response.text}")
54
+
55
+ page_prs = response.json()
56
+ if not page_prs: # No more PRs
57
+ break
58
+
59
+ all_open_prs.extend(page_prs)
60
+ page += 1
61
+
62
+ # Break if we got less than per_page results (last page)
63
+ if len(page_prs) < per_page:
64
+ break
65
+
66
+ filtered_prs = [pr for pr in all_open_prs if pr["title"].startswith("🌐 [i18n-KO]")]
67
+
68
+ pattern = re.compile(r"`([^`]+\.md)`")
69
+
70
+ filenames = [
71
+ "docs/source/en/" + match.group(1)
72
+ for pr in filtered_prs
73
+ if (match := pattern.search(pr["title"]))
74
+ ]
75
+ pr_info_list = [
76
+ f"https://github.com/huggingface/transformers/pull/{pr["url"].rstrip('/').split('/')[-1]}"
77
+ for pr in filtered_prs
78
+ ]
79
+ return filenames, pr_info_list
80
+
81
+
82
  def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
83
  """
84
  Retrieve missing docs