bakrianoo's picture
set custom Arabic extended language option
06c3d9b
raw
history blame
17.4 kB
import gradio as gr
from utils import (extract_wiki_id, get_wiki_details,
init_llm_client, split_content_into_sections,
get_translate_prompt)
import json
import json_repair
# Define language options for translation
LANGUAGES = {
"Arabic": "ar",
"Arabic-Extended": "ar-x-extended",
"English": "en",
"Spanish": "es",
"French": "fr",
"German": "de",
"Italian": "it",
"Portuguese": "pt",
"Russian": "ru",
"Japanese": "ja",
"Chinese": "zh",
"Hindi": "hi",
"Korean": "ko",
"Custom": "custom" # Add custom option
}
debug_display = None
debug_header = None
def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, custom_lang, content_format):
"""
Function to extract content from Wikipedia URL (placeholder for now)
"""
# Use custom language if selected
if target_lang == "Custom" and custom_lang:
target_lang = custom_lang
wiki_id = extract_wiki_id(wiki_url)
if not wiki_id:
return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {}
# Get the details of the Wikipedia article
wiki_details = get_wiki_details(wiki_id)
if content_format == "XML":
content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format)
else:
content_sections = split_content_into_sections(wiki_details['content'], content_format)
return (
"Extraction complete! Sections: " + str(len(content_sections)),
wiki_details['pageid'],
wiki_details['title'],
wiki_details['summary'],
wiki_details['wiki_xml'],
content_sections
)
def translate_content(content, article_title, artice_summary, content_format,
target_lang, api_key, model_id, base_url, preference_prompt=None, debug_mode=False):
llm_client = init_llm_client(api_key, base_url=base_url)
# Use the target_lang as is - it should already be properly resolved
# by the calling function (either a language code or custom value)
translation_prompt = get_translate_prompt(
article_title=article_title,
artice_summary=artice_summary,
original_content=content,
target_lang=target_lang,
content_format=content_format,
preference_prompt=preference_prompt
)
# Call the LLM to get the translation - updating params to match OpenAI's requirements
response = llm_client.chat.completions.create(
model=model_id,
messages=[
{"role": "user", "content": translation_prompt}
],
max_tokens=2000,
temperature=0.5
)
decoded_object = json_repair.loads(response.choices[0].message.content)
# Return translation and debug info if debug mode is enabled
if debug_mode:
debug_info = {
"prompt": translation_prompt,
"response": response.choices[0].message.content,
"usage": {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
},
"model": model_id
}
if 'output_content' in decoded_object:
return decoded_object['output_content'], debug_info
return "Error: Translation output not found in the response.", debug_info
# Regular return when debug mode is disabled
if 'output_content' in decoded_object:
return decoded_object['output_content']
return "Error: Translation output not found in the response."
def translate_section(section_content, article_title, article_summary, content_format,
target_lang, custom_lang, api_key, model_id, base_url, preference_prompt=None, debug_mode=False):
"""
Translates a single section of the Wikipedia article
"""
if not section_content or not api_key:
return "Please provide content and API key for translation.", None if debug_mode else None
# Use custom language if selected
if target_lang == "Custom" and custom_lang:
actual_lang = custom_lang
else:
actual_lang = target_lang
result = translate_content(
content=section_content,
article_title=article_title,
artice_summary=article_summary,
content_format=content_format,
target_lang=actual_lang,
api_key=api_key,
model_id=model_id,
base_url=base_url,
preference_prompt=preference_prompt,
debug_mode=debug_mode
)
if debug_mode:
translation, debug_info = result
return translation, debug_info
return result, None
def format_debug_info(debug_info):
"""Format debug information as markdown for display in modal"""
if not debug_info:
return "No debug information available."
# Format the debug information as markdown
markdown = "## LLM Debug Information\n\n"
# Add model and usage info
markdown += f"### Model: {debug_info['model']}\n\n"
markdown += "### Usage\n"
markdown += f"- Prompt tokens: {debug_info['usage']['prompt_tokens']}\n"
markdown += f"- Completion tokens: {debug_info['usage']['completion_tokens']}\n"
markdown += f"- Total tokens: {debug_info['usage']['total_tokens']}\n\n"
# Add prompt
markdown += "### Prompt\n"
markdown += f"```\n{debug_info['prompt'].replace('```','')}\n```\n\n"
# Add raw response
markdown += "### Raw Response\n"
markdown += f"```json\n{debug_info['response']}\n```\n"
return markdown
# Add this function to update UI with sections from Wikipedia content
def update_ui_with_sections(sections):
"""
Updates the UI to display sections from the Wikipedia article
Args:
sections: Dictionary of section titles and content
Returns:
List of updates for all section components
"""
results = []
# Prepare updates for up to 100 sections (400 components - 4 per section)
for i in range(100):
if i < len(sections):
# Get section title and content
section_title = list(sections.keys())[i]
section_content = sections[section_title]
# Make section textbox visible with content and label
results.extend([
gr.update(visible=True, value=section_content, label=f"Section: {section_title}"),
gr.update(visible=True), # Translate button
gr.update(visible=True, value="", label=f"Translation: {section_title}"), # Translation output
gr.update(visible=False) # Debug button (hidden by default)
])
else:
# Hide unused components
results.extend([
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False)
])
return results
# Create Gradio app
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
gr.Markdown("# Wikipedia Translator")
# State variables
sections_state = gr.State({})
sidebar_expanded = gr.State(True) # Track sidebar state, default is expanded
def toggle_sidebar(expanded):
"""Toggle the sidebar visibility"""
new_expanded = not expanded
return (
new_expanded,
gr.update(visible=new_expanded),
gr.update(scale=3 if not new_expanded else 2),
gr.update(visible=not new_expanded) # Control visibility of the show button
)
# Function to show/hide custom language input based on selection
def toggle_custom_language(target_lang):
if target_lang == "Custom":
return gr.update(visible=True)
return gr.update(visible=False)
with gr.Row() as main_layout:
# Sidebar for configuration
with gr.Column(scale=1, visible=True) as sidebar:
# Add a toggle button at the top of the sidebar with updated icon
sidebar_toggle = gr.Button("« Hide Sidebar", scale=0)
gr.Markdown("### Configuration")
with gr.Group():
api_key = gr.Textbox(
label="OpenAI API Key",
placeholder="sk-...",
type="password",
)
model_id = gr.Textbox(
label="OpenAI Model ID",
placeholder="gpt-4.1-mini",
value="gpt-4.1-mini",
)
base_url = gr.Textbox(
label="OpenAI API Base URL (Optional)",
placeholder="https://api.openai.com/v1",
info="Leave default unless using a proxy"
)
target_language = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="Arabic",
label="Target Language",
)
custom_language = gr.Textbox(
label="Custom Language",
placeholder="Enter language name (e.g., Swedish, Dutch, etc.)",
visible=False,
info="Specify your desired language if not in the list above"
)
# Connect the dropdown to show/hide custom language input
target_language.change(
fn=toggle_custom_language,
inputs=[target_language],
outputs=[custom_language]
)
content_format = gr.Radio(
choices=["Text", "XML"],
value="XML",
label="Content Format",
info="Choose how to display article content"
)
# Debug mode toggle
debug_mode = gr.Checkbox(
label="Debug Mode",
value=False,
info="Show detailed information about LLM calls"
)
# Add preference prompt section
gr.Markdown("### Translation Preferences")
preference_prompt = gr.Textbox(
label="Additional Translation Preferences",
placeholder="Enter any specific translation preferences or instructions...",
lines=5,
info="Optional: Add specific preferences for how the translation should be performed"
)
gr.Markdown("### About")
gr.Markdown("""
This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models.
1. Configure your API settings
2. Enter a Wikipedia URL
3. Click Extract to process the article
""")
# Main content area
with gr.Column(scale=2) as main_content:
# Show sidebar toggle button when sidebar is hidden (updated icon)
with gr.Row():
sidebar_show_btn = gr.Button("» Show Sidebar", visible=False, scale=0)
with gr.Column(scale=1):
gr.Markdown("### Wikipedia Article")
wiki_url = gr.Textbox(
label="Wikipedia URL",
placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence",
info="Enter the full URL of the Wikipedia article"
)
extract_button = gr.Button("Extract and Prepare for Translation", variant="primary")
output = gr.Markdown(label="Status")
# Results area (will expand in the future)
article_pageid = gr.Textbox(
label="Article Page ID",
placeholder="Page ID will appear here after extraction",
interactive=False,
show_copy_button=True
)
article_title = gr.Textbox(
label="Article Title",
placeholder="Title will appear here after extraction",
interactive=False,
show_copy_button=True
)
aticle_summary = gr.Textbox(
label="Article Summary",
placeholder="Summary will appear here after extraction",
interactive=False,
show_copy_button=True
)
article_xml = gr.Textbox(
label="Article XML",
placeholder="XML will appear here after extraction",
interactive=False,
visible=False, # Hidden by default as it's usually large
show_copy_button=True
)
# Debug info state and modal components
debug_info_state = gr.State(None)
# Remove the debug_markdown from the main area as we'll only use the sidebar for debug info
# Pre-define section textboxes and related components
gr.Markdown("### Article Sections")
with gr.Column() as sections_container:
section_components = []
for i in range(100): # Support up to 100 sections
with gr.Row():
section_textbox = gr.Textbox(visible=False, lines=4, show_copy_button=True)
translate_btn = gr.Button("Translate", visible=False)
translation_output = gr.Textbox(visible=False, lines=4, show_copy_button=True)
debug_btn = gr.Button("View Debug Info", visible=False)
section_components.extend([section_textbox, translate_btn, translation_output, debug_btn])
# Connect the translate button to the translation function
result = translate_btn.click(
fn=translate_section,
inputs=[
section_textbox,
article_title,
aticle_summary,
content_format,
target_language,
custom_language,
api_key,
model_id,
base_url,
preference_prompt,
debug_mode
],
outputs=[translation_output, debug_info_state]
)
# Show debug button only when debug mode is on and after translation
result.then(
fn=lambda debug_info, debug_mode: gr.update(visible=debug_mode and debug_info is not None),
inputs=[debug_info_state, debug_mode],
outputs=[debug_btn]
)
# Update this to only show the debug info in the sidebar
# We'll reconnect this later in the code
# Connect the extract button to the function
extract_button.click(
fn=extract_wikipedia_content,
inputs=[wiki_url, api_key, model_id, base_url, target_language, custom_language, content_format],
outputs=[
output,
article_pageid,
article_title,
aticle_summary,
article_xml,
sections_state,
]
).then(
fn=update_ui_with_sections,
inputs=[sections_state],
outputs=section_components
)
# Connect the sidebar toggle buttons
sidebar_toggle.click(
fn=toggle_sidebar,
inputs=[sidebar_expanded],
outputs=[sidebar_expanded, sidebar, main_content, sidebar_show_btn]
)
sidebar_show_btn.click(
fn=toggle_sidebar,
inputs=[sidebar_expanded],
outputs=[sidebar_expanded, sidebar, main_content, sidebar_show_btn]
)
# Add debug info display to the bottom of the sidebar
with sidebar:
debug_header = gr.Markdown("### Debug Information", visible=False)
debug_display = gr.Markdown(visible=False)
# Update the debug button click handler to show debug info in the sidebar
for i in range(0, len(section_components), 4):
debug_btn = section_components[i+3] # The debug button is the 4th component
# Connect debug button directly to show debug info only in the sidebar
debug_btn.click(
fn=format_debug_info,
inputs=[debug_info_state],
outputs=[debug_display]
).then(
fn=lambda: (gr.update(visible=True), gr.update(visible=True)),
outputs=[debug_header, debug_display]
)
# Launch the app
if __name__ == "__main__":
demo.launch()