Spaces:
Running
Running
import gradio as gr | |
from utils import (extract_wiki_id, get_wiki_details, | |
init_llm_client, split_content_into_sections, | |
get_translate_prompt) | |
import json | |
import json_repair | |
# Define language options for translation | |
LANGUAGES = { | |
"Arabic": "ar", | |
"Arabic-Extended": "ar-x-extended", | |
"English": "en", | |
"Spanish": "es", | |
"French": "fr", | |
"German": "de", | |
"Italian": "it", | |
"Portuguese": "pt", | |
"Russian": "ru", | |
"Japanese": "ja", | |
"Chinese": "zh", | |
"Hindi": "hi", | |
"Korean": "ko", | |
"Custom": "custom" # Add custom option | |
} | |
debug_display = None | |
debug_header = None | |
def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, custom_lang, content_format): | |
""" | |
Function to extract content from Wikipedia URL (placeholder for now) | |
""" | |
# Use custom language if selected | |
if target_lang == "Custom" and custom_lang: | |
target_lang = custom_lang | |
wiki_id = extract_wiki_id(wiki_url) | |
if not wiki_id: | |
return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {} | |
# Get the details of the Wikipedia article | |
wiki_details = get_wiki_details(wiki_id) | |
if content_format == "XML": | |
content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format) | |
else: | |
content_sections = split_content_into_sections(wiki_details['content'], content_format) | |
return ( | |
"Extraction complete! Sections: " + str(len(content_sections)), | |
wiki_details['pageid'], | |
wiki_details['title'], | |
wiki_details['summary'], | |
wiki_details['wiki_xml'], | |
content_sections | |
) | |
def translate_content(content, article_title, artice_summary, content_format, | |
target_lang, api_key, model_id, base_url, preference_prompt=None, debug_mode=False): | |
llm_client = init_llm_client(api_key, base_url=base_url) | |
# Use the target_lang as is - it should already be properly resolved | |
# by the calling function (either a language code or custom value) | |
translation_prompt = get_translate_prompt( | |
article_title=article_title, | |
artice_summary=artice_summary, | |
original_content=content, | |
target_lang=target_lang, | |
content_format=content_format, | |
preference_prompt=preference_prompt | |
) | |
# Call the LLM to get the translation - updating params to match OpenAI's requirements | |
response = llm_client.chat.completions.create( | |
model=model_id, | |
messages=[ | |
{"role": "user", "content": translation_prompt} | |
], | |
max_tokens=2000, | |
temperature=0.5 | |
) | |
decoded_object = json_repair.loads(response.choices[0].message.content) | |
# Return translation and debug info if debug mode is enabled | |
if debug_mode: | |
debug_info = { | |
"prompt": translation_prompt, | |
"response": response.choices[0].message.content, | |
"usage": { | |
"prompt_tokens": response.usage.prompt_tokens, | |
"completion_tokens": response.usage.completion_tokens, | |
"total_tokens": response.usage.total_tokens | |
}, | |
"model": model_id | |
} | |
if 'output_content' in decoded_object: | |
return decoded_object['output_content'], debug_info | |
return "Error: Translation output not found in the response.", debug_info | |
# Regular return when debug mode is disabled | |
if 'output_content' in decoded_object: | |
return decoded_object['output_content'] | |
return "Error: Translation output not found in the response." | |
def translate_section(section_content, article_title, article_summary, content_format, | |
target_lang, custom_lang, api_key, model_id, base_url, preference_prompt=None, debug_mode=False): | |
""" | |
Translates a single section of the Wikipedia article | |
""" | |
if not section_content or not api_key: | |
return "Please provide content and API key for translation.", None if debug_mode else None | |
# Use custom language if selected | |
if target_lang == "Custom" and custom_lang: | |
actual_lang = custom_lang | |
else: | |
actual_lang = target_lang | |
result = translate_content( | |
content=section_content, | |
article_title=article_title, | |
artice_summary=article_summary, | |
content_format=content_format, | |
target_lang=actual_lang, | |
api_key=api_key, | |
model_id=model_id, | |
base_url=base_url, | |
preference_prompt=preference_prompt, | |
debug_mode=debug_mode | |
) | |
if debug_mode: | |
translation, debug_info = result | |
return translation, debug_info | |
return result, None | |
def format_debug_info(debug_info): | |
"""Format debug information as markdown for display in modal""" | |
if not debug_info: | |
return "No debug information available." | |
# Format the debug information as markdown | |
markdown = "## LLM Debug Information\n\n" | |
# Add model and usage info | |
markdown += f"### Model: {debug_info['model']}\n\n" | |
markdown += "### Usage\n" | |
markdown += f"- Prompt tokens: {debug_info['usage']['prompt_tokens']}\n" | |
markdown += f"- Completion tokens: {debug_info['usage']['completion_tokens']}\n" | |
markdown += f"- Total tokens: {debug_info['usage']['total_tokens']}\n\n" | |
# Add prompt | |
markdown += "### Prompt\n" | |
markdown += f"```\n{debug_info['prompt'].replace('```','')}\n```\n\n" | |
# Add raw response | |
markdown += "### Raw Response\n" | |
markdown += f"```json\n{debug_info['response']}\n```\n" | |
return markdown | |
# Add this function to update UI with sections from Wikipedia content | |
def update_ui_with_sections(sections): | |
""" | |
Updates the UI to display sections from the Wikipedia article | |
Args: | |
sections: Dictionary of section titles and content | |
Returns: | |
List of updates for all section components | |
""" | |
results = [] | |
# Prepare updates for up to 100 sections (400 components - 4 per section) | |
for i in range(100): | |
if i < len(sections): | |
# Get section title and content | |
section_title = list(sections.keys())[i] | |
section_content = sections[section_title] | |
# Make section textbox visible with content and label | |
results.extend([ | |
gr.update(visible=True, value=section_content, label=f"Section: {section_title}"), | |
gr.update(visible=True), # Translate button | |
gr.update(visible=True, value="", label=f"Translation: {section_title}"), # Translation output | |
gr.update(visible=False) # Debug button (hidden by default) | |
]) | |
else: | |
# Hide unused components | |
results.extend([ | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False) | |
]) | |
return results | |
# Create Gradio app | |
with gr.Blocks(theme=gr.themes.Monochrome()) as demo: | |
gr.Markdown("# Wikipedia Translator") | |
# State variables | |
sections_state = gr.State({}) | |
sidebar_expanded = gr.State(True) # Track sidebar state, default is expanded | |
def toggle_sidebar(expanded): | |
"""Toggle the sidebar visibility""" | |
new_expanded = not expanded | |
return ( | |
new_expanded, | |
gr.update(visible=new_expanded), | |
gr.update(scale=3 if not new_expanded else 2), | |
gr.update(visible=not new_expanded) # Control visibility of the show button | |
) | |
# Function to show/hide custom language input based on selection | |
def toggle_custom_language(target_lang): | |
if target_lang == "Custom": | |
return gr.update(visible=True) | |
return gr.update(visible=False) | |
with gr.Row() as main_layout: | |
# Sidebar for configuration | |
with gr.Column(scale=1, visible=True) as sidebar: | |
# Add a toggle button at the top of the sidebar with updated icon | |
sidebar_toggle = gr.Button("« Hide Sidebar", scale=0) | |
gr.Markdown("### Configuration") | |
with gr.Group(): | |
api_key = gr.Textbox( | |
label="OpenAI API Key", | |
placeholder="sk-...", | |
type="password", | |
) | |
model_id = gr.Textbox( | |
label="OpenAI Model ID", | |
placeholder="gpt-4.1-mini", | |
value="gpt-4.1-mini", | |
) | |
base_url = gr.Textbox( | |
label="OpenAI API Base URL (Optional)", | |
placeholder="https://api.openai.com/v1", | |
info="Leave default unless using a proxy" | |
) | |
target_language = gr.Dropdown( | |
choices=list(LANGUAGES.keys()), | |
value="Arabic", | |
label="Target Language", | |
) | |
custom_language = gr.Textbox( | |
label="Custom Language", | |
placeholder="Enter language name (e.g., Swedish, Dutch, etc.)", | |
visible=False, | |
info="Specify your desired language if not in the list above" | |
) | |
# Connect the dropdown to show/hide custom language input | |
target_language.change( | |
fn=toggle_custom_language, | |
inputs=[target_language], | |
outputs=[custom_language] | |
) | |
content_format = gr.Radio( | |
choices=["Text", "XML"], | |
value="XML", | |
label="Content Format", | |
info="Choose how to display article content" | |
) | |
# Debug mode toggle | |
debug_mode = gr.Checkbox( | |
label="Debug Mode", | |
value=False, | |
info="Show detailed information about LLM calls" | |
) | |
# Add preference prompt section | |
gr.Markdown("### Translation Preferences") | |
preference_prompt = gr.Textbox( | |
label="Additional Translation Preferences", | |
placeholder="Enter any specific translation preferences or instructions...", | |
lines=5, | |
info="Optional: Add specific preferences for how the translation should be performed" | |
) | |
gr.Markdown("### About") | |
gr.Markdown(""" | |
This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models. | |
1. Configure your API settings | |
2. Enter a Wikipedia URL | |
3. Click Extract to process the article | |
""") | |
# Main content area | |
with gr.Column(scale=2) as main_content: | |
# Show sidebar toggle button when sidebar is hidden (updated icon) | |
with gr.Row(): | |
sidebar_show_btn = gr.Button("» Show Sidebar", visible=False, scale=0) | |
with gr.Column(scale=1): | |
gr.Markdown("### Wikipedia Article") | |
wiki_url = gr.Textbox( | |
label="Wikipedia URL", | |
placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence", | |
info="Enter the full URL of the Wikipedia article" | |
) | |
extract_button = gr.Button("Extract and Prepare for Translation", variant="primary") | |
output = gr.Markdown(label="Status") | |
# Results area (will expand in the future) | |
article_pageid = gr.Textbox( | |
label="Article Page ID", | |
placeholder="Page ID will appear here after extraction", | |
interactive=False, | |
show_copy_button=True | |
) | |
article_title = gr.Textbox( | |
label="Article Title", | |
placeholder="Title will appear here after extraction", | |
interactive=False, | |
show_copy_button=True | |
) | |
aticle_summary = gr.Textbox( | |
label="Article Summary", | |
placeholder="Summary will appear here after extraction", | |
interactive=False, | |
show_copy_button=True | |
) | |
article_xml = gr.Textbox( | |
label="Article XML", | |
placeholder="XML will appear here after extraction", | |
interactive=False, | |
visible=False, # Hidden by default as it's usually large | |
show_copy_button=True | |
) | |
# Debug info state and modal components | |
debug_info_state = gr.State(None) | |
# Remove the debug_markdown from the main area as we'll only use the sidebar for debug info | |
# Pre-define section textboxes and related components | |
gr.Markdown("### Article Sections") | |
with gr.Column() as sections_container: | |
section_components = [] | |
for i in range(100): # Support up to 100 sections | |
with gr.Row(): | |
section_textbox = gr.Textbox(visible=False, lines=4, show_copy_button=True) | |
translate_btn = gr.Button("Translate", visible=False) | |
translation_output = gr.Textbox(visible=False, lines=4, show_copy_button=True) | |
debug_btn = gr.Button("View Debug Info", visible=False) | |
section_components.extend([section_textbox, translate_btn, translation_output, debug_btn]) | |
# Connect the translate button to the translation function | |
result = translate_btn.click( | |
fn=translate_section, | |
inputs=[ | |
section_textbox, | |
article_title, | |
aticle_summary, | |
content_format, | |
target_language, | |
custom_language, | |
api_key, | |
model_id, | |
base_url, | |
preference_prompt, | |
debug_mode | |
], | |
outputs=[translation_output, debug_info_state] | |
) | |
# Show debug button only when debug mode is on and after translation | |
result.then( | |
fn=lambda debug_info, debug_mode: gr.update(visible=debug_mode and debug_info is not None), | |
inputs=[debug_info_state, debug_mode], | |
outputs=[debug_btn] | |
) | |
# Update this to only show the debug info in the sidebar | |
# We'll reconnect this later in the code | |
# Connect the extract button to the function | |
extract_button.click( | |
fn=extract_wikipedia_content, | |
inputs=[wiki_url, api_key, model_id, base_url, target_language, custom_language, content_format], | |
outputs=[ | |
output, | |
article_pageid, | |
article_title, | |
aticle_summary, | |
article_xml, | |
sections_state, | |
] | |
).then( | |
fn=update_ui_with_sections, | |
inputs=[sections_state], | |
outputs=section_components | |
) | |
# Connect the sidebar toggle buttons | |
sidebar_toggle.click( | |
fn=toggle_sidebar, | |
inputs=[sidebar_expanded], | |
outputs=[sidebar_expanded, sidebar, main_content, sidebar_show_btn] | |
) | |
sidebar_show_btn.click( | |
fn=toggle_sidebar, | |
inputs=[sidebar_expanded], | |
outputs=[sidebar_expanded, sidebar, main_content, sidebar_show_btn] | |
) | |
# Add debug info display to the bottom of the sidebar | |
with sidebar: | |
debug_header = gr.Markdown("### Debug Information", visible=False) | |
debug_display = gr.Markdown(visible=False) | |
# Update the debug button click handler to show debug info in the sidebar | |
for i in range(0, len(section_components), 4): | |
debug_btn = section_components[i+3] # The debug button is the 4th component | |
# Connect debug button directly to show debug info only in the sidebar | |
debug_btn.click( | |
fn=format_debug_info, | |
inputs=[debug_info_state], | |
outputs=[debug_display] | |
).then( | |
fn=lambda: (gr.update(visible=True), gr.update(visible=True)), | |
outputs=[debug_header, debug_display] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |