|
import yaml |
|
import requests |
|
from typing import Dict, List, Any |
|
import os |
|
|
|
class TocTreeHandler: |
|
def __init__(self): |
|
self.en_toctree_url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/en/_toctree.yml" |
|
self.ko_toctree_url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/ko/_toctree.yml" |
|
self.local_docs_path = "docs/source/ko" |
|
|
|
def fetch_toctree(self, url: str) -> Dict[str, Any]: |
|
"""Fetch and parse YAML from URL""" |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
return yaml.safe_load(response.text) |
|
|
|
def get_en_toctree(self) -> Dict[str, Any]: |
|
"""Get English toctree structure""" |
|
return self.fetch_toctree(self.en_toctree_url) |
|
|
|
def get_ko_toctree(self) -> Dict[str, Any]: |
|
"""Get Korean toctree structure""" |
|
return self.fetch_toctree(self.ko_toctree_url) |
|
|
|
def extract_title_mappings(self, en_data: List[Dict], ko_data: List[Dict]) -> Dict[str, str]: |
|
"""Extract title mappings between English and Korean""" |
|
mappings = {} |
|
|
|
def process_section(en_section: Dict, ko_section: Dict): |
|
if 'local' in en_section and 'local' in ko_section: |
|
if en_section['local'] == ko_section['local']: |
|
en_title = en_section.get('title', '') |
|
ko_title = ko_section.get('title', '') |
|
if en_title and ko_title: |
|
mappings[en_title] = ko_title |
|
|
|
if 'sections' in en_section and 'sections' in ko_section: |
|
en_sections = en_section['sections'] |
|
ko_sections = ko_section['sections'] |
|
|
|
for i, en_sub in enumerate(en_sections): |
|
if i < len(ko_sections): |
|
process_section(en_sub, ko_sections[i]) |
|
|
|
for i, en_item in enumerate(en_data): |
|
if i < len(ko_data): |
|
process_section(en_item, ko_data[i]) |
|
|
|
return mappings |
|
|
|
def translate_title(self, en_title: str) -> str: |
|
"""Translate English title to Korean using LLM""" |
|
try: |
|
from translator.content import llm_translate |
|
|
|
prompt = f"""Translate the following English documentation title to Korean. Return only the translated title, nothing else. |
|
|
|
English title: {en_title} |
|
|
|
Korean title:""" |
|
|
|
callback_result, translated_title = llm_translate(prompt) |
|
return translated_title.strip() |
|
except Exception as e: |
|
print(f"Error translating title '{en_title}': {e}") |
|
return en_title |
|
|
|
def create_local_toctree(self, en_title: str, local_file_path: str) -> Dict[str, str]: |
|
"""Create local toctree entry with Korean title and local path""" |
|
try: |
|
|
|
en_data = self.get_en_toctree() |
|
ko_data = self.get_ko_toctree() |
|
|
|
title_mappings = self.extract_title_mappings(en_data, ko_data) |
|
ko_title = title_mappings.get(en_title) |
|
|
|
|
|
if not ko_title: |
|
ko_title = self.translate_title(en_title) |
|
|
|
return { |
|
'local': local_file_path, |
|
'title': ko_title |
|
} |
|
except Exception as e: |
|
print(f"Error creating local toctree: {e}") |
|
return { |
|
'local': local_file_path, |
|
'title': en_title |
|
} |
|
|
|
def find_and_update_translation_entry(self, ko_toctree_data, target_local: str, english_title: str, korean_title: str): |
|
"""Find entry with '(번역중) 영어제목' and update it""" |
|
target_title_pattern = f"(번역중) {english_title}" |
|
|
|
def process_item(item): |
|
if isinstance(item, dict): |
|
|
|
if item.get('title') == target_title_pattern: |
|
|
|
item['local'] = target_local |
|
item['title'] = korean_title |
|
return True |
|
|
|
|
|
if 'sections' in item: |
|
for section in item['sections']: |
|
if process_item(section): |
|
return True |
|
return False |
|
|
|
|
|
if isinstance(ko_toctree_data, list): |
|
for item in ko_toctree_data: |
|
if process_item(item): |
|
return True |
|
return False |
|
|
|
def create_updated_toctree_with_replacement(self, ko_toctree: list, target_local: str) -> list: |
|
"""Update Korean toctree by finding and updating translation entry""" |
|
try: |
|
|
|
en_toctree = self.get_en_toctree() |
|
english_title = self.find_title_for_local(en_toctree, target_local) |
|
|
|
if not english_title: |
|
print(f"Could not find English title for local: {target_local}") |
|
return ko_toctree |
|
|
|
print(f"Found English title: {english_title} for local: {target_local}") |
|
|
|
|
|
korean_title = self.translate_title(english_title) |
|
print(f"Translated Korean title: {korean_title}") |
|
|
|
|
|
import copy |
|
updated_toctree = copy.deepcopy(ko_toctree) |
|
|
|
|
|
updated = self.find_and_update_translation_entry( |
|
updated_toctree, target_local, english_title, korean_title |
|
) |
|
|
|
if updated: |
|
print(f"Successfully updated translation entry: local={target_local}, title={korean_title}") |
|
return updated_toctree |
|
else: |
|
print(f"Could not find '(번역중) {english_title}' entry to update") |
|
return ko_toctree |
|
|
|
except Exception as e: |
|
print(f"Error creating updated toctree: {e}") |
|
return ko_toctree |
|
|
|
def find_title_for_local(self, toctree_data, target_local: str): |
|
"""Find title for given local path in toctree""" |
|
def search_item(item): |
|
if isinstance(item, dict): |
|
if item.get('local') == target_local: |
|
return item.get('title', '') |
|
|
|
if 'sections' in item: |
|
for section in item['sections']: |
|
result = search_item(section) |
|
if result: |
|
return result |
|
return None |
|
|
|
if isinstance(toctree_data, list): |
|
for item in toctree_data: |
|
result = search_item(item) |
|
if result: |
|
return result |
|
return None |
|
|
|
def process_pr_commit(self, filepath: str): |
|
"""Process PR commit by updating Korean toctree with translated entry""" |
|
|
|
filepath_without_prefix = filepath.replace("docs/source/en/", "").replace(".md", "") |
|
|
|
|
|
ko_toctree = self.get_ko_toctree() |
|
|
|
|
|
updated_ko_toctree = self.create_updated_toctree_with_replacement(ko_toctree, filepath_without_prefix) |
|
|
|
if not updated_ko_toctree: |
|
print(f"Failed to create updated Korean toctree for local: {filepath_without_prefix}") |
|
return |
|
|
|
print(f"Successfully updated Korean toctree") |
|
|
|
|
|
self.updated_ko_toctree = updated_ko_toctree |
|
|
|
def commit_and_push_toctree(self, pr_agent, owner: str, repo_name: str, branch_name: str): |
|
"""Commit and push toctree updates as a separate commit""" |
|
try: |
|
|
|
if not hasattr(self, 'updated_ko_toctree') or not self.updated_ko_toctree: |
|
print("No updated Korean toctree available") |
|
return {"status": "error", "message": "No updated toctree to commit"} |
|
|
|
ko_data = self.updated_ko_toctree |
|
|
|
|
|
toctree_content = yaml.dump(ko_data, allow_unicode=True, default_flow_style=False, sort_keys=False) |
|
|
|
|
|
commit_message = "docs: update Korean documentation table of contents" |
|
|
|
|
|
file_result = pr_agent.create_or_update_file( |
|
owner=owner, |
|
repo_name=repo_name, |
|
path="docs/source/ko/_toctree.yml", |
|
message=commit_message, |
|
content=toctree_content, |
|
branch_name=branch_name |
|
) |
|
|
|
if file_result.startswith("SUCCESS"): |
|
return { |
|
"status": "success", |
|
"message": f"Toctree committed successfully: {file_result}", |
|
"commit_message": commit_message |
|
} |
|
else: |
|
return { |
|
"status": "error", |
|
"message": f"Toctree commit failed: {file_result}" |
|
} |
|
|
|
except Exception as e: |
|
return { |
|
"status": "error", |
|
"message": f"Error committing toctree: {str(e)}" |
|
} |
|
|
|
def update_toctree_after_translation( |
|
self, |
|
translation_result: dict, |
|
filepath: str, |
|
pr_agent, |
|
github_config: dict |
|
) -> dict: |
|
"""Update toctree after successful translation PR. |
|
|
|
Args: |
|
translation_result: Result from translation PR workflow |
|
filepath: Original file path |
|
pr_agent: GitHub PR agent instance |
|
github_config: GitHub configuration dictionary |
|
|
|
Returns: |
|
Dictionary with toctree update result |
|
""" |
|
if translation_result["status"] == "error": |
|
return None |
|
|
|
try: |
|
|
|
self.process_pr_commit(filepath) |
|
|
|
print("self.updated_ko_toctree:", self.updated_ko_toctree) |
|
if self.updated_ko_toctree: |
|
return self.commit_and_push_toctree( |
|
pr_agent=pr_agent, |
|
owner=github_config["owner"], |
|
repo_name=github_config["repo_name"], |
|
branch_name=translation_result["branch"] |
|
) |
|
|
|
except Exception as e: |
|
return { |
|
"status": "error", |
|
"message": f"Error updating toctree: {str(e)}" |
|
} |
|
|