bakrianoo commited on
Commit
c25ce6b
·
1 Parent(s): e424603

setup llm parser

Browse files
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
2
- from utils import extract_wiki_id, get_wiki_details, split_content_into_sections
 
 
3
  import json
4
 
5
  # Define language options for translation
@@ -39,6 +41,27 @@ def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang
39
  content_sections
40
  )
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def update_ui_with_sections(sections_dict):
43
  """
44
  Creates a list of components to display in the sections area
 
1
  import gradio as gr
2
+ from utils import (extract_wiki_id, get_wiki_details,
3
+ init_llm_client, split_content_into_sections,
4
+ get_translate_prompt)
5
  import json
6
 
7
  # Define language options for translation
 
41
  content_sections
42
  )
43
 
44
+ def translate_content(content, article_title, artice_summary, target_lang, api_key, model_id, base_url):
45
+
46
+ llm_client = init_llm_client(api_key, model_id, base_url)
47
+
48
+ translation_prompt = get_translate_prompt(
49
+ article_title=article_title,
50
+ artice_summary=artice_summary,
51
+ original_content=content,
52
+ target_lang=target_lang
53
+ )
54
+
55
+ # Call the LLM to get the translation
56
+ response = llm_client.responses.create(
57
+ messages=[
58
+ {"role": "user", "content": translation_prompt}
59
+ ],
60
+ model=model_id,
61
+ max_tokens=2000,
62
+ temperature=0.5
63
+ )
64
+
65
  def update_ui_with_sections(sections_dict):
66
  """
67
  Creates a list of components to display in the sections area
utils/__init__.py CHANGED
@@ -1 +1,3 @@
1
  from .wikipedia_extractor import (extract_wiki_id, get_wiki_details, split_content_into_sections)
 
 
 
1
  from .wikipedia_extractor import (extract_wiki_id, get_wiki_details, split_content_into_sections)
2
+ from .llm_parser import init_llm_client
3
+ from .llm_prompts import get_translate_prompt
utils/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ
 
utils/__pycache__/llm_parser.cpython-310.pyc ADDED
Binary file (560 Bytes). View file
 
utils/__pycache__/llm_prompts.cpython-310.pyc ADDED
Binary file (2.24 kB). View file
 
utils/llm_parser.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI
3
+
4
+
5
+ def init_llm_client(api_key, model_id, base_url=None):
6
+ """
7
+ Initialize the OpenAI client with the provided API key and model ID.
8
+ """
9
+ if base_url:
10
+ os.environ["OPENAI_API_BASE"] = base_url
11
+ os.environ["OPENAI_API_KEY"] = api_key
12
+ os.environ["OPENAI_MODEL_ID"] = model_id
13
+
14
+ return OpenAI(api_key=api_key, model_id=model_id, base_url=base_url)
15
+
utils/llm_prompts.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ def get_translate_prompt(article_title, artice_summary, original_content, target_lang):
4
+ """
5
+ Function to get the translation prompt for the LLM to translate Wikipedia XML content
6
+ with high quality and fidelity to the original.
7
+ """
8
+ # Define the prompt template
9
+ translate_prompt = (
10
+ "# Task\n"
11
+ "You are an expert Wikipedia translator specializing in multilingual content adaptation. "
12
+ "Your task is to translate the provided XML content into {target_lang} while preserving the "
13
+ "academic tone, factual accuracy, and encyclopedic style of Wikipedia.\n\n"
14
+
15
+ "# Article Original Title\n"
16
+ "{article_title}\n\n"
17
+
18
+ "# Article Summary\n"
19
+ "{article_summary}\n\n"
20
+
21
+ "# Article Original Content (XML format)\n"
22
+ "{original_content}\n\n"
23
+
24
+ "# Target Language\n"
25
+ "{target_lang}\n\n"
26
+
27
+ "# Instructions\n"
28
+ "1. Preserve all XML tags, attributes, and structure exactly as they appear\n"
29
+ "2. Translate only the text content between XML tags\n"
30
+ "3. Maintain Wikipedia's neutral point of view and encyclopedic style\n"
31
+ "4. Preserve proper nouns, scientific terminology, and citations appropriately\n"
32
+ "5. Adapt cultural references or idioms to be understandable in the target language\n"
33
+ "6. Use terminology consistent with the {target_lang} Wikipedia for similar topics\n"
34
+ "7. Maintain the same paragraph structure and information hierarchy\n\n"
35
+
36
+ "# Output Format\n"
37
+ "Return a single JSON object with the following structure:\n"
38
+ "```json\n" +
39
+ json.dumps({
40
+ "translated_content": "The complete translated XML content with all tags preserved",
41
+ }, indent=4, ensure_ascii=False) +
42
+ "\n```\n\n"
43
+
44
+ "# Translation Quality Guidelines\n"
45
+ "- Accuracy: Ensure factual information is preserved exactly\n"
46
+ "- Completeness: Translate all content, don't summarize or omit information\n"
47
+ "- Consistency: Use consistent terminology throughout the article\n"
48
+ "- Fluency: Produce natural-sounding text in the target language\n"
49
+ "- Formatting: Preserve all formatting elements, including lists, tables, and emphasis\n"
50
+
51
+ "# Output json\n"
52
+ "```json\n"
53
+
54
+ )
55
+
56
+ # Format the prompt with the provided values
57
+ formatted_prompt = translate_prompt.format(
58
+ article_title=article_title,
59
+ article_summary=artice_summary,
60
+ original_content=original_content,
61
+ target_lang=target_lang
62
+ )
63
+
64
+ return formatted_prompt