File size: 2,677 Bytes
c25ce6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json

def get_translate_prompt(article_title, artice_summary, original_content, target_lang):
    """
    Function to get the translation prompt for the LLM to translate Wikipedia XML content
    with high quality and fidelity to the original.
    """
    # Define the prompt template
    translate_prompt = (
        "# Task\n"
        "You are an expert Wikipedia translator specializing in multilingual content adaptation. "
        "Your task is to translate the provided XML content into {target_lang} while preserving the "
        "academic tone, factual accuracy, and encyclopedic style of Wikipedia.\n\n"
        
        "# Article Original Title\n"
        "{article_title}\n\n"
        
        "# Article Summary\n"
        "{article_summary}\n\n"
        
        "# Article Original Content (XML format)\n"
        "{original_content}\n\n"
        
        "# Target Language\n"
        "{target_lang}\n\n"
        
        "# Instructions\n"
        "1. Preserve all XML tags, attributes, and structure exactly as they appear\n"
        "2. Translate only the text content between XML tags\n"
        "3. Maintain Wikipedia's neutral point of view and encyclopedic style\n"
        "4. Preserve proper nouns, scientific terminology, and citations appropriately\n"
        "5. Adapt cultural references or idioms to be understandable in the target language\n"
        "6. Use terminology consistent with the {target_lang} Wikipedia for similar topics\n"
        "7. Maintain the same paragraph structure and information hierarchy\n\n"
        
        "# Output Format\n"
        "Return a single JSON object with the following structure:\n"
        "```json\n" +
        json.dumps({
            "translated_content": "The complete translated XML content with all tags preserved",
        }, indent=4, ensure_ascii=False) +
        "\n```\n\n"
        
        "# Translation Quality Guidelines\n"
        "- Accuracy: Ensure factual information is preserved exactly\n"
        "- Completeness: Translate all content, don't summarize or omit information\n"
        "- Consistency: Use consistent terminology throughout the article\n"
        "- Fluency: Produce natural-sounding text in the target language\n"
        "- Formatting: Preserve all formatting elements, including lists, tables, and emphasis\n"

        "# Output json\n"
        "```json\n"
        
    )

    # Format the prompt with the provided values
    formatted_prompt = translate_prompt.format(
        article_title=article_title,
        article_summary=artice_summary,
        original_content=original_content,
        target_lang=target_lang
    )

    return formatted_prompt