Spaces:
Running
Running
import json | |
def get_translate_prompt(article_title, artice_summary, content_format, original_content, target_lang, preference_prompt=None): | |
""" | |
Function to get the translation prompt for the LLM to translate Wikipedia {content_format} content | |
with high quality and fidelity to the original. | |
Args: | |
article_title: Title of the Wikipedia article | |
artice_summary: Summary of the Wikipedia article | |
content_format: Format of the content (e.g., XML, Text) | |
original_content: The content to be translated | |
target_lang: The target language for translation | |
preference_prompt: Optional additional user preferences for translation | |
""" | |
# Define the prompt template | |
translate_prompt = ( | |
"# Task\n" | |
"You are an expert Wikipedia translator specializing in multilingual content adaptation. " | |
"Your task is to translate the provided {content_format} content into {target_lang} while preserving the " | |
"academic tone, factual accuracy, and encyclopedic style of Wikipedia.\n\n" | |
"# Article Original Title\n" | |
"{article_title}\n\n" | |
"# Article Summary\n" | |
"{article_summary}\n\n" | |
"# Article Original Content ({content_format} format)\n" | |
"{original_content}\n\n" | |
"# Target Language\n" | |
"{target_lang}\n\n" | |
"# Instructions\n" | |
"1. If provided input is XML code, Preserve all XML tags, attributes, and structure exactly as they appear\n" | |
"2. If provided input is XML code, Translate only the text content between XML tags\n" | |
"3. Maintain Wikipedia's neutral point of view and encyclopedic style\n" | |
"4. Preserve proper nouns, scientific terminology, and citations appropriately\n" | |
"5. Adapt cultural references or idioms to be understandable in the target language\n" | |
"6. Use terminology consistent with the {target_lang} Wikipedia for similar topics\n" | |
"7. Maintain the same paragraph structure and information hierarchy\n" | |
) | |
# Add special instructions for Arabic-Extended | |
if target_lang in ["ar-x-extended", "Arabic-Extended"]: | |
translate_prompt += ( | |
"\n# Arabic-Extended Alphabet Guidelines\n" | |
"When translating to Arabic-Extended, use the extended Arabic alphabet ONLY for entity names " | |
"(people, places, brands, foreign terms) that contain sounds not in standard Arabic. Use these special characters:\n\n" | |
"- ڤ (V): Use for 'v' sound in foreign names instead of ف\n" | |
"- پ (P): Use for 'p' sound in foreign names instead of ب\n" | |
"- چ (Ch): Use for 'ch' sound in foreign names instead of تش\n" | |
"- گ (G): Use for 'g' sound in foreign names instead of ج/غ/ك\n" | |
"- ژ (Zh): Use for 'zh/j' sound in foreign names instead of ز/ج\n" | |
"- ڠ (ng): Use for 'ng' sound in foreign names instead of نج/نغ\n\n" | |
"Examples:\n" | |
"- 'Vancouver' → 'ڤانكوڤر' (using ڤ for V)\n" | |
"- 'Pakistan' → 'پاكستان' (using پ for P)\n" | |
"- 'Chicago' → 'چيكاغو' (using چ for Ch)\n" | |
"- 'Google' → 'گوگل' (using گ for G)\n\n" | |
"Important: Use these extended characters ONLY for entity names. Use standard Arabic for all other content.\n" | |
) | |
# Add user preference prompt if provided | |
if preference_prompt and preference_prompt.strip(): | |
translate_prompt += ( | |
"\n# Additional Translation Preferences\n" | |
f"{preference_prompt}\n" | |
) | |
# Add the output format section | |
translate_prompt += ( | |
"\n# Output Format\n" | |
"Return a single JSON object with the following structure:\n" | |
"```json\n" | |
"{{\n" | |
" \"output_content\": \"The complete translated {content_format} content with all tags preserved\"\n" | |
"}}\n" | |
"```\n\n" | |
"# Translation Quality Guidelines\n" | |
"- Accuracy: Ensure factual information is preserved exactly\n" | |
"- Completeness: Translate all content, don't summarize or omit information\n" | |
"- Consistency: Use consistent terminology throughout the article\n" | |
"- Fluency: Produce natural-sounding text in the target language\n" | |
"- Formatting: Preserve all formatting elements, including lists, tables, and emphasis\n" | |
"# Output json\n" | |
"```json\n" | |
) | |
# Format the prompt with the provided values | |
formatted_prompt = translate_prompt.format( | |
article_title=article_title, | |
article_summary=artice_summary, | |
original_content=original_content, | |
target_lang=target_lang, | |
content_format=content_format | |
) | |
return formatted_prompt |