Spaces:

bakrianoo
/

wikipedia-translator

Sleeping

File size: 6,632 Bytes

import gradio as gr
from utils import (extract_wiki_id, get_wiki_details,
                   init_llm_client, split_content_into_sections,
                   get_translate_prompt)
import json

# Define language options for translation
LANGUAGES = {
    "Arabic": "ar",
    "English": "en",
    "Spanish": "es",
    "French": "fr",
    "German": "de",
    "Italian": "it",
    "Portuguese": "pt",
    "Russian": "ru",
    "Japanese": "ja",
    "Chinese": "zh",
    "Hindi": "hi",
    "Korean": "ko"
}

def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang):
    """
    Function to extract content from Wikipedia URL (placeholder for now)
    """
    wiki_id = extract_wiki_id(wiki_url)
    if not wiki_id:
        return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {}
    
    # Get the details of the Wikipedia article
    wiki_details = get_wiki_details(wiki_id)
    content_sections = split_content_into_sections(wiki_details['wiki_xml'])
    
    return (
        "Extraction complete! Sections: " + str(len(content_sections)),
        wiki_details['pageid'], 
        wiki_details['title'],
        wiki_details['summary'], 
        wiki_details['wiki_xml'],
        content_sections
    )

def translate_content(content, article_title, artice_summary, target_lang, api_key, model_id, base_url):

    llm_client = init_llm_client(api_key, model_id, base_url)

    translation_prompt = get_translate_prompt(
        article_title=article_title,
        artice_summary=artice_summary,
        original_content=content,
        target_lang=target_lang
    )

    # Call the LLM to get the translation
    response = llm_client.responses.create(
        messages=[
            {"role": "user", "content": translation_prompt}
        ],
        model=model_id,
        max_tokens=2000,
        temperature=0.5
    )

def update_ui_with_sections(sections_dict):
    """
    Creates a list of components to display in the sections area
    """
    components = []
    
    if not sections_dict:
        return [gr.update(visible=False) for _ in range(10)]  # Assuming max 10 sections
    
    # Create visible components for available sections
    for section_name, section_content in sections_dict.items():
        components.append(gr.update(
            value=section_content,
            label=f"Section: {section_name}",
            visible=True
        ))
    
    # Hide any unused components
    remaining = 100 - len(components)  # Assuming max 100 sections
    for _ in range(remaining):
        components.append(gr.update(visible=False))
    
    return components

# Create Gradio app
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
    gr.Markdown("# Wikipedia Translator")
    
    # State variable to store sections
    sections_state = gr.State({})
    
    with gr.Row():
        # Sidebar for configuration
        with gr.Column(scale=1):
            gr.Markdown("### Configuration")
            
            with gr.Group():
                api_key = gr.Textbox(
                    label="OpenAI API Key", 
                    placeholder="sk-...",
                    type="password",
                )
                
                model_id = gr.Textbox(
                    label="OpenAI Model ID",
                    placeholder="gpt-4.1-mini",
                )
                
                base_url = gr.Textbox(
                    label="OpenAI API Base URL (Optional)",
                    placeholder="https://api.openai.com/v1",
                    info="Leave default unless using a proxy"
                )
                
                target_language = gr.Dropdown(
                    choices=list(LANGUAGES.keys()),
                    value="Spanish",
                    label="Target Language",
                )
            
            gr.Markdown("### About")
            gr.Markdown("""
            This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models.
            
            1. Configure your API settings
            2. Enter a Wikipedia URL
            3. Click Extract to process the article
            """)
        
        # Main content area
        with gr.Column(scale=2):
            gr.Markdown("### Wikipedia Article")
            
            wiki_url = gr.Textbox(
                label="Wikipedia URL",
                placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence",
                info="Enter the full URL of the Wikipedia article"
            )
            
            extract_button = gr.Button("Extract and Prepare for Translation", variant="primary")
            
            output = gr.Markdown(label="Status")
            
            # Results area (will expand in the future)
            article_pageid = gr.Textbox(
                label="Article Page ID",
                placeholder="Page ID will appear here after extraction",
                interactive=False
            )

            article_title = gr.Textbox(
                label="Article Title",
                placeholder="Title will appear here after extraction",
                interactive=False
            )

            aticle_summary = gr.Textbox(
                label="Article Summary",
                placeholder="Summary will appear here after extraction",
                interactive=False
            )

            article_xml = gr.Textbox(
                label="Article XML",
                placeholder="XML will appear here after extraction",
                interactive=False,
                visible=False  # Hidden by default as it's usually large
            )
            
            # Pre-define section textboxes (limit to 100 for simplicity)
            gr.Markdown("### Article Sections")
            with gr.Column() as sections_container:
                section_textboxes = [
                    gr.Textbox(visible=False, lines=4)
                    for _ in range(100)  # Support up to 100 sections
                ]
    
    # Connect the extract button to the function
    extract_button.click(
        fn=extract_wikipedia_content,
        inputs=[wiki_url, api_key, model_id, base_url, target_language],
        outputs=[
            output,
            article_pageid,
            article_title,
            aticle_summary,
            article_xml,
            sections_state,
        ]
    ).then(
        fn=update_ui_with_sections,
        inputs=[sections_state],
        outputs=section_textboxes
    )

# Launch the app
if __name__ == "__main__":
    demo.launch()