import gradio as gr from utils import (extract_wiki_id, get_wiki_details, init_llm_client, split_content_into_sections, get_translate_prompt) import json import json_repair import os import tempfile # Define language options for translation LANGUAGES = { "Arabic": "ar", "Arabic-Extended": "ar-x-extended", "English": "en", "Spanish": "es", "French": "fr", "German": "de", "Italian": "it", "Portuguese": "pt", "Russian": "ru", "Japanese": "ja", "Chinese": "zh", "Hindi": "hi", "Korean": "ko", "Custom": "custom" # Add custom option } debug_display = None debug_header = None def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, custom_lang, content_format, chunking): """ Function to extract content from Wikipedia URL (placeholder for now) """ # Use custom language if selected if target_lang == "Custom" and custom_lang: target_lang = custom_lang wiki_id = extract_wiki_id(wiki_url) if not wiki_id: return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {} # Get the details of the Wikipedia article wiki_details = get_wiki_details(wiki_id) if chunking: # Split content into sections when chunking is enabled if content_format == "XML": content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format) else: content_sections = split_content_into_sections(wiki_details['content'], content_format) else: # Use entire content as a single section when chunking is disabled content_sections = {"Full Article": wiki_details['content'] if content_format == "Text" else wiki_details['wiki_xml']} return ( "Extraction complete! Sections: " + str(len(content_sections)), wiki_details['pageid'], wiki_details['title'], wiki_details['summary'], wiki_details['wiki_xml'], content_sections ) def translate_content(content, article_title, artice_summary, content_format, target_lang, api_key, model_id, base_url, preference_prompt=None, debug_mode=False): llm_client = init_llm_client(api_key, base_url=base_url) # Use the target_lang as is - it should already be properly resolved # by the calling function (either a language code or custom value) translation_prompt = get_translate_prompt( article_title=article_title, artice_summary=artice_summary, original_content=content, target_lang=target_lang, content_format=content_format, preference_prompt=preference_prompt ) # Call the LLM to get the translation - updating params to match OpenAI's requirements response = llm_client.chat.completions.create( model=model_id, messages=[ {"role": "user", "content": translation_prompt} ], max_tokens=2000, temperature=0.5 ) decoded_object = json_repair.loads(response.choices[0].message.content) # Return translation and debug info if debug mode is enabled if debug_mode: debug_info = { "prompt": translation_prompt, "response": response.choices[0].message.content, "usage": { "prompt_tokens": response.usage.prompt_tokens, "completion_tokens": response.usage.completion_tokens, "total_tokens": response.usage.total_tokens }, "model": model_id } if 'output_content' in decoded_object: return decoded_object['output_content'], debug_info return "Error: Translation output not found in the response.", debug_info # Regular return when debug mode is disabled if 'output_content' in decoded_object: return decoded_object['output_content'] return "Error: Translation output not found in the response." def translate_section(section_content, article_title, article_summary, content_format, target_lang, custom_lang, api_key, model_id, base_url, preference_prompt=None, debug_mode=False): """ Translates a single section of the Wikipedia article """ if not section_content or not api_key: return "Please provide content and API key for translation.", None if debug_mode else None # Use custom language if selected if target_lang == "Custom" and custom_lang: actual_lang = custom_lang else: actual_lang = target_lang result = translate_content( content=section_content, article_title=article_title, artice_summary=article_summary, content_format=content_format, target_lang=actual_lang, api_key=api_key, model_id=model_id, base_url=base_url, preference_prompt=preference_prompt, debug_mode=debug_mode ) if debug_mode: translation, debug_info = result return translation, debug_info return result, None def format_debug_info(debug_info): """Format debug information as markdown for display in modal""" if not debug_info: return "No debug information available." # Format the debug information as markdown markdown = "## LLM Debug Information\n\n" # Add model and usage info markdown += f"### Model: {debug_info['model']}\n\n" markdown += "### Usage\n" markdown += f"- Prompt tokens: {debug_info['usage']['prompt_tokens']}\n" markdown += f"- Completion tokens: {debug_info['usage']['completion_tokens']}\n" markdown += f"- Total tokens: {debug_info['usage']['total_tokens']}\n\n" # Add prompt markdown += "### Prompt\n" markdown += f"```\n{debug_info['prompt'].replace('```','')}\n```\n\n" # Add raw response markdown += "### Raw Response\n" markdown += f"```json\n{debug_info['response']}\n```\n" return markdown # Functions to generate downloadable content for original and translated articles def generate_download_original(download_format, article_title, article_summary, article_xml, sections): """ Generate a downloadable original content file in the specified format. """ # Prepare content and filename if download_format == "Wikipedia XML": content = article_xml or "" filename = f"{article_title or 'article'}.xml" elif download_format == "HTML": parts = [f"

{article_title}

", f"

{article_summary}

"] for title, text in sections.items(): parts.append(f"

{title}

") parts.append(f"

{text}

") content = "\n".join(parts) filename = f"{article_title or 'article'}.html" elif download_format == "JSON": obj = {"title": article_title, "summary": article_summary, "sections": sections} content = json.dumps(obj, ensure_ascii=False, indent=2) filename = f"{article_title or 'article'}.json" else: # Plain Text parts = [article_title or 'Article', article_summary or ''] for title, text in sections.items(): parts.append(f"## {title}\n{text}") content = "\n\n".join(parts) filename = f"{article_title or 'article'}.txt" # Write to a temp file and return its path temp_path = os.path.join(tempfile.gettempdir(), filename) with open(temp_path, 'w', encoding='utf-8') as f: f.write(content) return temp_path def generate_download_translated(download_format, article_title, article_summary, sections, *translations_values): """ Generate downloadable translated content file from existing translations. """ # Build translations dict from provided UI values section_titles = list(sections.keys()) translations = {section_titles[i]: translations_values[i] for i in range(min(len(section_titles), len(translations_values)))} # Build downloadable content if download_format == "Wikipedia XML": parts = [f"
"] for title, text in translations.items(): parts.append(f"
{text}
") parts.append("
") content = "\n".join(parts) filename = f"{article_title or 'article'}_translated.xml" elif download_format == "HTML": parts = [f"

{article_title}

", f"

{article_summary}

"] for title, text in translations.items(): parts.append(f"

{title}

") parts.append(f"

{text}

") content = "\n".join(parts) filename = f"{article_title or 'article'}_translated.html" elif download_format == "JSON": obj = {"title": article_title, "summary": article_summary, "sections": translations} content = json.dumps(obj, ensure_ascii=False, indent=2) filename = f"{article_title or 'article'}_translated.json" else: parts = [article_title or 'Article', article_summary or ''] for title, text in translations.items(): parts.append(f"## {title}\n{text}") content = "\n\n".join(parts) filename = f"{article_title or 'article'}_translated.txt" # Write to temp file temp_path = os.path.join(tempfile.gettempdir(), filename) with open(temp_path, 'w', encoding='utf-8') as f: f.write(content) return temp_path def clean_section_title(title): """Clean section title from HTML entities and comments""" # Remove HTML comments import re title = re.sub(r'', '', title) # Replace HTML entities import html title = html.unescape(title) # Remove extra whitespace title = ' '.join(title.split()) return title.strip() # Add this function to update UI with sections from Wikipedia content def update_ui_with_sections(sections): """ Updates the UI to display sections from the Wikipedia article Args: sections: Dictionary of section titles and content Returns: List of updates for all section components """ results = [] # Prepare updates for up to 100 sections (400 components - 4 per section) for i in range(100): if i < len(sections): # Get section title and content section_title = list(sections.keys())[i] section_content = sections[section_title] # Clean the section title for display clean_title = clean_section_title(section_title) # Make section textbox visible with content and label results.extend([ gr.update(visible=True, value=section_content, label=f"Section: {clean_title}"), gr.update(visible=True), # Translate button gr.update(visible=True, value="", label=f"Translation: {clean_title}"), # Translation output gr.update(visible=False) # Debug button (hidden by default) ]) else: # Hide unused components results.extend([ gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) ]) return results # Create Gradio app with gr.Blocks(theme=gr.themes.Monochrome(), css=""" .odd-section { background-color: rgb(228 213 213); padding: 15px; border-radius: 8px; margin: 10px 0; } """) as demo: gr.Markdown("# Wikipedia Translator") gr.Markdown(""" **Translate Wikipedia articles into any language using AI** This tool helps you: - Extract content from any Wikipedia article - Translate it into your chosen language using OpenAI's models - Maintain the article's structure and formatting - Download the translated content in various formats Start by configuring your API settings in the sidebar and entering a Wikipedia URL below. --- Developed by [Abu Bakr Soliman](https://www.linkedin.com/in/bakrianoo/) """) # State variables sections_state = gr.State({}) sidebar_expanded = gr.State(True) # Track sidebar state, default is expanded def toggle_sidebar(expanded): """Toggle the sidebar visibility""" new_expanded = not expanded return ( new_expanded, gr.update(visible=new_expanded), gr.update(scale=3 if not new_expanded else 2), gr.update(visible=not new_expanded) # Control visibility of the show button ) # Function to show/hide custom language input based on selection def toggle_custom_language(target_lang): if target_lang == "Custom": return gr.update(visible=True) return gr.update(visible=False) with gr.Row() as main_layout: # Sidebar for configuration with gr.Column(scale=1, visible=True) as sidebar: # Add a toggle button at the top of the sidebar with updated icon sidebar_toggle = gr.Button("« Hide Sidebar", scale=0) gr.Markdown("### Configuration") with gr.Group(): api_key = gr.Textbox( label="OpenAI API Key", placeholder="sk-...", type="password", ) model_id = gr.Textbox( label="OpenAI Model ID", placeholder="gpt-4.1-mini", value="gpt-4.1-mini", ) base_url = gr.Textbox( label="OpenAI API Base URL (Optional)", placeholder="https://api.openai.com/v1", info="Leave default unless using a proxy" ) target_language = gr.Dropdown( choices=list(LANGUAGES.keys()), value="Arabic", label="Target Language", ) custom_language = gr.Textbox( label="Custom Language", placeholder="Enter language name (e.g., Swedish, Dutch, etc.)", visible=False, info="Specify your desired language if not in the list above" ) # Connect the dropdown to show/hide custom language input target_language.change( fn=toggle_custom_language, inputs=[target_language], outputs=[custom_language] ) # Add chunking control before content format chunking = gr.Checkbox( label="Enable Content Chunking", value=False, info="Split content into sections for individual translation" ) content_format = gr.Radio( choices=["Text", "XML"], value="XML", label="Content Format", info="Choose how to display article content" ) # Debug mode toggle debug_mode = gr.Checkbox( label="Debug Mode", value=False, info="Show detailed information about LLM calls" ) # Add preference prompt section gr.Markdown("### Translation Preferences") preference_prompt = gr.Textbox( label="Additional Translation Preferences", placeholder="Enter any specific translation preferences or instructions...", lines=5, info="Optional: Add specific preferences for how the translation should be performed" ) # Replace static About section with Accordion with gr.Accordion("About", open=False): gr.Markdown(""" This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models. 1. Configure your API settings 2. Enter a Wikipedia URL 3. Click Extract to process the article --- Developed by [Abu Bakr Soliman](https://www.linkedin.com/in/bakrianoo/) """) # Main content area with gr.Column(scale=2) as main_content: # Show sidebar toggle button when sidebar is hidden (updated icon) with gr.Row(): sidebar_show_btn = gr.Button("» Show Sidebar", visible=False, scale=0) with gr.Column(scale=1): gr.Markdown("### Wikipedia Article") wiki_url = gr.Textbox( label="Wikipedia URL", placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence", info="Enter the full URL of the Wikipedia article" ) extract_button = gr.Button("Extract and Prepare for Translation", variant="primary") output = gr.Markdown(label="Status") # Results area (will expand in the future) article_pageid = gr.Textbox( label="Article Page ID", placeholder="Page ID will appear here after extraction", interactive=False, show_copy_button=True ) article_title = gr.Textbox( label="Article Title", placeholder="Title will appear here after extraction", interactive=False, show_copy_button=True ) aticle_summary = gr.Textbox( label="Article Summary", placeholder="Summary will appear here after extraction", interactive=False, show_copy_button=True ) article_xml = gr.Textbox( label="Article XML", placeholder="XML will appear here after extraction", interactive=False, visible=False, # Hidden by default as it's usually large show_copy_button=True ) # Debug info state and modal components debug_info_state = gr.State(None) # Remove the debug_markdown from the main area as we'll only use the sidebar for debug info # Pre-define section textboxes and related components gr.Markdown("### Article Sections") with gr.Column() as sections_container: section_components = [] for i in range(100): # Support up to 100 sections with gr.Column(elem_classes=["odd-section"] if i % 2 == 0 else []) as section: # Add class for odd sections # Section content section_textbox = gr.Textbox(visible=False, lines=4, show_copy_button=True) with gr.Row(): # Controls row translate_btn = gr.Button("Translate", visible=False) debug_btn = gr.Button("View Debug Info", visible=False) # Translation output translation_output = gr.Textbox(visible=False, lines=4, show_copy_button=True) # Add separator gr.Markdown("---", visible=False) section_components.extend([section_textbox, translate_btn, translation_output, debug_btn]) # Connect the translate button to the translation function result = translate_btn.click( fn=translate_section, inputs=[ section_textbox, article_title, aticle_summary, content_format, target_language, custom_language, api_key, model_id, base_url, preference_prompt, debug_mode ], outputs=[translation_output, debug_info_state] ) # Show debug button only when debug mode is on and after translation result.then( fn=lambda debug_info, debug_mode: gr.update(visible=debug_mode and debug_info is not None), inputs=[debug_info_state, debug_mode], outputs=[debug_btn] ) # Update this to only show the debug info in the sidebar # We'll reconnect this later in the code # Connect the extract button to the function extract_button.click( fn=extract_wikipedia_content, inputs=[wiki_url, api_key, model_id, base_url, target_language, custom_language, content_format, chunking], outputs=[ output, article_pageid, article_title, aticle_summary, article_xml, sections_state, ] ).then( fn=update_ui_with_sections, inputs=[sections_state], outputs=section_components ) # Connect the sidebar toggle buttons sidebar_toggle.click( fn=toggle_sidebar, inputs=[sidebar_expanded], outputs=[sidebar_expanded, sidebar, main_content, sidebar_show_btn] ) sidebar_show_btn.click( fn=toggle_sidebar, inputs=[sidebar_expanded], outputs=[sidebar_expanded, sidebar, main_content, sidebar_show_btn] ) # Add download options to the bottom of the sidebar with sidebar: download_format = gr.Dropdown( choices=["Wikipedia XML", "HTML", "JSON", "Plain Text"], value="Wikipedia XML", label="Download Format" ) download_original_btn = gr.Button("Download Original") download_original_file = gr.File(label="Original Article") download_translated_btn = gr.Button("Download Translated") download_translated_file = gr.File(label="Translated Article") # Debug info display debug_header = gr.Markdown("### Debug Information", visible=False) debug_display = gr.Markdown(visible=False) # Update the debug button click handler to show debug info in the sidebar for i in range(0, len(section_components), 4): debug_btn = section_components[i+3] # The debug button is the 4th component # Connect debug button directly to show debug info only in the sidebar debug_btn.click( fn=format_debug_info, inputs=[debug_info_state], outputs=[debug_display] ).then( fn=lambda: (gr.update(visible=True), gr.update(visible=True)), outputs=[debug_header, debug_display] ) # Connect download buttons download_original_btn.click( fn=generate_download_original, inputs=[download_format, article_title, aticle_summary, article_xml, sections_state], outputs=[download_original_file] ) # Prepare existing translation outputs for download translation_outputs = [section_components[i+2] for i in range(0, len(section_components), 4)] download_translated_btn.click( fn=generate_download_translated, inputs=[download_format, article_title, aticle_summary, sections_state] + translation_outputs, outputs=[download_translated_file] ) # Launch the app if __name__ == "__main__": demo.launch()