bakrianoo's picture
specify the translated format
94260c3
raw
history blame
10 kB
import gradio as gr
from utils import (extract_wiki_id, get_wiki_details,
init_llm_client, split_content_into_sections,
get_translate_prompt)
import json
import json_repair
# Define language options for translation
LANGUAGES = {
"Arabic": "ar",
"English": "en",
"Spanish": "es",
"French": "fr",
"German": "de",
"Italian": "it",
"Portuguese": "pt",
"Russian": "ru",
"Japanese": "ja",
"Chinese": "zh",
"Hindi": "hi",
"Korean": "ko"
}
def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, content_format):
"""
Function to extract content from Wikipedia URL (placeholder for now)
"""
wiki_id = extract_wiki_id(wiki_url)
if not wiki_id:
return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {}
# Get the details of the Wikipedia article
wiki_details = get_wiki_details(wiki_id)
if content_format == "XML":
content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format)
else:
content_sections = split_content_into_sections(wiki_details['content'], content_format)
return (
"Extraction complete! Sections: " + str(len(content_sections)),
wiki_details['pageid'],
wiki_details['title'],
wiki_details['summary'],
wiki_details['wiki_xml'],
content_sections
)
def translate_content(content, article_title, artice_summary, content_format,
target_lang, api_key, model_id, base_url):
llm_client = init_llm_client(api_key, base_url=base_url)
translation_prompt = get_translate_prompt(
article_title=article_title,
artice_summary=artice_summary,
original_content=content,
target_lang=target_lang,
content_format=content_format
)
# Call the LLM to get the translation - updating params to match OpenAI's requirements
response = llm_client.chat.completions.create(
model=model_id,
messages=[
{"role": "user", "content": translation_prompt}
],
max_tokens=2000,
temperature=0.5
)
decoded_object = json_repair.loads(response.choices[0].message.content)
if 'output_content' in decoded_object:
return decoded_object['output_content']
return "Error: Translation output not found in the response."
def translate_section(section_content, article_title, article_summary, content_format, target_lang, api_key, model_id, base_url):
"""
Translates a single section of the Wikipedia article
"""
if not section_content or not api_key:
return "Please provide content and API key for translation."
return translate_content(
content=section_content,
article_title=article_title,
artice_summary=article_summary,
content_format=content_format,
target_lang=target_lang,
api_key=api_key,
model_id=model_id,
base_url=base_url
)
def update_ui_with_sections(sections_dict):
"""
Creates a list of components to display in the sections area
"""
components = []
if not sections_dict:
# Return updates for all components (input, button, output)
empty_updates = []
for _ in range(100): # Assuming max 100 sections
empty_updates.extend([
gr.update(visible=False), # section textbox
gr.update(visible=False), # translate button
gr.update(visible=False) # translation output
])
return empty_updates
# Create visible components for available sections
for section_name, section_content in sections_dict.items():
# Update for section content textbox
components.append(gr.update(
value=section_content,
label=f"Section: {section_name}",
visible=True
))
# Update for translate button
components.append(gr.update(
visible=True,
value=f"Translate {section_name}"
))
# Update for translation output
components.append(gr.update(
visible=True,
value="",
label=f"Translation: {section_name}"
))
# Hide any unused components
remaining = 100 - len(sections_dict) # Assuming max 100 sections
for _ in range(remaining):
components.extend([
gr.update(visible=False), # section textbox
gr.update(visible=False), # translate button
gr.update(visible=False) # translation output
])
return components
# Create Gradio app
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
gr.Markdown("# Wikipedia Translator")
# State variable to store sections
sections_state = gr.State({})
with gr.Row():
# Sidebar for configuration
with gr.Column(scale=1):
gr.Markdown("### Configuration")
with gr.Group():
api_key = gr.Textbox(
label="OpenAI API Key",
placeholder="sk-...",
type="password",
)
model_id = gr.Textbox(
label="OpenAI Model ID",
placeholder="gpt-4.1-mini",
value="gpt-4.1-mini",
)
base_url = gr.Textbox(
label="OpenAI API Base URL (Optional)",
placeholder="https://api.openai.com/v1",
info="Leave default unless using a proxy"
)
target_language = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="Arabic",
label="Target Language",
)
content_format = gr.Radio(
choices=["Text", "XML"],
value="XML",
label="Content Format",
info="Choose how to display article content"
)
gr.Markdown("### About")
gr.Markdown("""
This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models.
1. Configure your API settings
2. Enter a Wikipedia URL
3. Click Extract to process the article
""")
# Main content area
with gr.Column(scale=2):
gr.Markdown("### Wikipedia Article")
wiki_url = gr.Textbox(
label="Wikipedia URL",
placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence",
info="Enter the full URL of the Wikipedia article"
)
extract_button = gr.Button("Extract and Prepare for Translation", variant="primary")
output = gr.Markdown(label="Status")
# Results area (will expand in the future)
article_pageid = gr.Textbox(
label="Article Page ID",
placeholder="Page ID will appear here after extraction",
interactive=False
)
article_title = gr.Textbox(
label="Article Title",
placeholder="Title will appear here after extraction",
interactive=False
)
aticle_summary = gr.Textbox(
label="Article Summary",
placeholder="Summary will appear here after extraction",
interactive=False
)
article_xml = gr.Textbox(
label="Article XML",
placeholder="XML will appear here after extraction",
interactive=False,
visible=False # Hidden by default as it's usually large
)
# Pre-define section textboxes and related components
gr.Markdown("### Article Sections")
with gr.Column() as sections_container:
section_components = []
for i in range(100): # Support up to 100 sections
with gr.Row():
section_textbox = gr.Textbox(visible=False, lines=4)
translate_btn = gr.Button("Translate", visible=False)
translation_output = gr.Textbox(visible=False, lines=4)
section_components.extend([section_textbox, translate_btn, translation_output])
# Connect the translate button to the translation function
translate_btn.click(
fn=translate_section,
inputs=[
section_textbox,
article_title,
aticle_summary,
content_format,
target_language,
api_key,
model_id,
base_url
],
outputs=translation_output
)
# Connect the extract button to the function
extract_button.click(
fn=extract_wikipedia_content,
inputs=[wiki_url, api_key, model_id, base_url, target_language, content_format],
outputs=[
output,
article_pageid,
article_title,
aticle_summary,
article_xml,
sections_state,
]
).then(
fn=update_ui_with_sections,
inputs=[sections_state],
outputs=section_components
)
# Launch the app
if __name__ == "__main__":
demo.launch()