Spaces:
Running
Running
import json | |
def get_translate_prompt(article_title, artice_summary, original_content, target_lang): | |
""" | |
Function to get the translation prompt for the LLM to translate Wikipedia XML content | |
with high quality and fidelity to the original. | |
""" | |
# Define the prompt template | |
translate_prompt = ( | |
"# Task\n" | |
"You are an expert Wikipedia translator specializing in multilingual content adaptation. " | |
"Your task is to translate the provided XML content into {target_lang} while preserving the " | |
"academic tone, factual accuracy, and encyclopedic style of Wikipedia.\n\n" | |
"# Article Original Title\n" | |
"{article_title}\n\n" | |
"# Article Summary\n" | |
"{article_summary}\n\n" | |
"# Article Original Content (XML format)\n" | |
"{original_content}\n\n" | |
"# Target Language\n" | |
"{target_lang}\n\n" | |
"# Instructions\n" | |
"1. Preserve all XML tags, attributes, and structure exactly as they appear\n" | |
"2. Translate only the text content between XML tags\n" | |
"3. Maintain Wikipedia's neutral point of view and encyclopedic style\n" | |
"4. Preserve proper nouns, scientific terminology, and citations appropriately\n" | |
"5. Adapt cultural references or idioms to be understandable in the target language\n" | |
"6. Use terminology consistent with the {target_lang} Wikipedia for similar topics\n" | |
"7. Maintain the same paragraph structure and information hierarchy\n\n" | |
"# Output Format\n" | |
"Return a single JSON object with the following structure:\n" | |
"```json\n" + | |
json.dumps({ | |
"translated_content": "The complete translated XML content with all tags preserved", | |
}, indent=4, ensure_ascii=False) + | |
"\n```\n\n" | |
"# Translation Quality Guidelines\n" | |
"- Accuracy: Ensure factual information is preserved exactly\n" | |
"- Completeness: Translate all content, don't summarize or omit information\n" | |
"- Consistency: Use consistent terminology throughout the article\n" | |
"- Fluency: Produce natural-sounding text in the target language\n" | |
"- Formatting: Preserve all formatting elements, including lists, tables, and emphasis\n" | |
"# Output json\n" | |
"```json\n" | |
) | |
# Format the prompt with the provided values | |
formatted_prompt = translate_prompt.format( | |
article_title=article_title, | |
article_summary=artice_summary, | |
original_content=original_content, | |
target_lang=target_lang | |
) | |
return formatted_prompt |