Spaces:

bakrianoo
/

wikipedia-translator

Sleeping

App Files Files Community

bakrianoo commited on May 16

Commit

c065ba1

1 Parent(s): e39d0f6

translate xml from wikipedia

Browse files

Files changed (6) hide show

app.py +77 -15
utils/__pycache__/llm_parser.cpython-310.pyc +0 -0
utils/__pycache__/llm_prompts.cpython-310.pyc +0 -0
utils/__pycache__/wikipedia_extractor.cpython-310.pyc +0 -0
utils/llm_parser.py +8 -5
utils/llm_prompts.py +5 -5

app.py CHANGED Viewed

@@ -44,7 +44,7 @@ def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang
 def translate_content(content, article_title, artice_summary, target_lang, api_key, model_id, base_url):
-    llm_client = init_llm_client(api_key, model_id, base_url)
     translation_prompt = get_translate_prompt(
         article_title=article_title,
@@ -53,22 +53,39 @@ def translate_content(content, article_title, artice_summary, target_lang, api_k
         target_lang=target_lang
     )
-    # Call the LLM to get the translation
-    response = llm_client.responses.create(
         messages=[
             {"role": "user", "content": translation_prompt}
         ],
-        model=model_id,
         max_tokens=2000,
         temperature=0.5
     )
-    decoded_object = json_repair.loads(response.choices[0].message['content'])
     if 'output_content' in decoded_object:
         return decoded_object['output_content']
     return "Error: Translation output not found in the response."
 def update_ui_with_sections(sections_dict):
     """
     Creates a list of components to display in the sections area
@@ -76,20 +93,46 @@ def update_ui_with_sections(sections_dict):
     components = []
     if not sections_dict:
-        return [gr.update(visible=False) for _ in range(10)]  # Assuming max 10 sections
     # Create visible components for available sections
     for section_name, section_content in sections_dict.items():
         components.append(gr.update(
             value=section_content,
             label=f"Section: {section_name}",
             visible=True
         ))
     # Hide any unused components
-    remaining = 100 - len(components)  # Assuming max 100 sections
     for _ in range(remaining):
-        components.append(gr.update(visible=False))
     return components
@@ -115,6 +158,7 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
                 model_id = gr.Textbox(
                     label="OpenAI Model ID",
                     placeholder="gpt-4.1-mini",
                 )
                 base_url = gr.Textbox(
@@ -125,7 +169,7 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
                 target_language = gr.Dropdown(
                     choices=list(LANGUAGES.keys()),
-                    value="Spanish",
                     label="Target Language",
                 )
@@ -178,13 +222,31 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
                 visible=False  # Hidden by default as it's usually large
             )
-            # Pre-define section textboxes (limit to 100 for simplicity)
             gr.Markdown("### Article Sections")
             with gr.Column() as sections_container:
-                section_textboxes = [
-                    gr.Textbox(visible=False, lines=4)
-                    for _ in range(100)  # Support up to 100 sections
-                ]
     # Connect the extract button to the function
     extract_button.click(
@@ -201,7 +263,7 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     ).then(
         fn=update_ui_with_sections,
         inputs=[sections_state],
-        outputs=section_textboxes
     )
 # Launch the app

 def translate_content(content, article_title, artice_summary, target_lang, api_key, model_id, base_url):
+    llm_client = init_llm_client(api_key, base_url=base_url)
     translation_prompt = get_translate_prompt(
         article_title=article_title,
         target_lang=target_lang
     )
+    # Call the LLM to get the translation - updating params to match OpenAI's requirements
+    response = llm_client.chat.completions.create(
+        model=model_id,
         messages=[
             {"role": "user", "content": translation_prompt}
         ],
         max_tokens=2000,
         temperature=0.5
     )
+    decoded_object = json_repair.loads(response.choices[0].message.content)
     if 'output_content' in decoded_object:
         return decoded_object['output_content']
     return "Error: Translation output not found in the response."
+def translate_section(section_content, article_title, article_summary, target_lang, api_key, model_id, base_url):
+    """
+    Translates a single section of the Wikipedia article
+    """
+    if not section_content or not api_key:
+        return "Please provide content and API key for translation."
+    return translate_content(
+        content=section_content,
+        article_title=article_title,
+        artice_summary=article_summary,
+        target_lang=target_lang,
+        api_key=api_key,
+        model_id=model_id,
+        base_url=base_url
+    )
 def update_ui_with_sections(sections_dict):
     """
     Creates a list of components to display in the sections area
     components = []
     if not sections_dict:
+        # Return updates for all components (input, button, output)
+        empty_updates = []
+        for _ in range(100):  # Assuming max 100 sections
+            empty_updates.extend([
+                gr.update(visible=False),  # section textbox
+                gr.update(visible=False),  # translate button
+                gr.update(visible=False)   # translation output
+            ])
+        return empty_updates
     # Create visible components for available sections
     for section_name, section_content in sections_dict.items():
+        # Update for section content textbox
         components.append(gr.update(
             value=section_content,
             label=f"Section: {section_name}",
             visible=True
         ))
+        # Update for translate button
+        components.append(gr.update(
+            visible=True,
+            value=f"Translate {section_name}"
+        ))
+        # Update for translation output
+        components.append(gr.update(
+            visible=True,
+            value="",
+            label=f"Translation: {section_name}"
+        ))
     # Hide any unused components
+    remaining = 100 - len(sections_dict)  # Assuming max 100 sections
     for _ in range(remaining):
+        components.extend([
+            gr.update(visible=False),  # section textbox
+            gr.update(visible=False),  # translate button
+            gr.update(visible=False)   # translation output
+        ])
     return components
                 model_id = gr.Textbox(
                     label="OpenAI Model ID",
                     placeholder="gpt-4.1-mini",
+                    value="gpt-4.1-mini",
                 )
                 base_url = gr.Textbox(
                 target_language = gr.Dropdown(
                     choices=list(LANGUAGES.keys()),
+                    value="Arabic",
                     label="Target Language",
                 )
                 visible=False  # Hidden by default as it's usually large
             )
+            # Pre-define section textboxes and related components
             gr.Markdown("### Article Sections")
             with gr.Column() as sections_container:
+                section_components = []
+                for i in range(100):  # Support up to 100 sections
+                    with gr.Row():
+                        section_textbox = gr.Textbox(visible=False, lines=4)
+                        translate_btn = gr.Button("Translate", visible=False)
+                        translation_output = gr.Textbox(visible=False, lines=4)
+                        section_components.extend([section_textbox, translate_btn, translation_output])
+                        # Connect the translate button to the translation function
+                        translate_btn.click(
+                            fn=translate_section,
+                            inputs=[
+                                section_textbox,
+                                article_title,
+                                aticle_summary,
+                                target_language,
+                                api_key,
+                                model_id,
+                                base_url
+                            ],
+                            outputs=translation_output
+                        )
     # Connect the extract button to the function
     extract_button.click(
     ).then(
         fn=update_ui_with_sections,
         inputs=[sections_state],
+        outputs=section_components
     )
 # Launch the app

utils/__pycache__/llm_parser.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/llm_parser.cpython-310.pyc and b/utils/__pycache__/llm_parser.cpython-310.pyc differ

utils/__pycache__/llm_prompts.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/llm_prompts.cpython-310.pyc and b/utils/__pycache__/llm_prompts.cpython-310.pyc differ

utils/__pycache__/wikipedia_extractor.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/wikipedia_extractor.cpython-310.pyc and b/utils/__pycache__/wikipedia_extractor.cpython-310.pyc differ

utils/llm_parser.py CHANGED Viewed

@@ -1,15 +1,18 @@
 import os
 from openai import OpenAI
-def init_llm_client(api_key, model_id, base_url=None):
     """
     Initialize the OpenAI client with the provided API key and model ID.
     """
-    if base_url:
         os.environ["OPENAI_API_BASE"] = base_url
     os.environ["OPENAI_API_KEY"] = api_key
-    os.environ["OPENAI_MODEL_ID"] = model_id
-    return OpenAI(api_key=api_key, model_id=model_id, base_url=base_url)

 import os
 from openai import OpenAI
+import urllib
+def init_llm_client(api_key, base_url=None):
     """
     Initialize the OpenAI client with the provided API key and model ID.
     """
+    print("base_url", base_url)
+    if base_url and len(base_url) > 0 and base_url.startswith("http"):
         os.environ["OPENAI_API_BASE"] = base_url
+    else:
+        base_url = None
     os.environ["OPENAI_API_KEY"] = api_key
+    return OpenAI(api_key=api_key, base_url=base_url)

utils/llm_prompts.py CHANGED Viewed

@@ -35,11 +35,11 @@ def get_translate_prompt(article_title, artice_summary, original_content, target
         "# Output Format\n"
         "Return a single JSON object with the following structure:\n"
-        "```json\n" +
-        json.dumps({
-            "output_content": "The complete translated XML content with all tags preserved",
-        }, indent=4, ensure_ascii=False) +
-        "\n```\n\n"
         "# Translation Quality Guidelines\n"
         "- Accuracy: Ensure factual information is preserved exactly\n"

         "# Output Format\n"
         "Return a single JSON object with the following structure:\n"
+        "```json\n"
+        "{{\n"
+        "    \"output_content\": \"The complete translated XML content with all tags preserved\"\n"
+        "}}\n"
+        "```\n\n"
         "# Translation Quality Guidelines\n"
         "- Accuracy: Ensure factual information is preserved exactly\n"