Spaces:
Sleeping
Sleeping
import gradio as gr | |
from utils import (extract_wiki_id, get_wiki_details, | |
init_llm_client, split_content_into_sections, | |
get_translate_prompt) | |
import json | |
# Define language options for translation | |
LANGUAGES = { | |
"Arabic": "ar", | |
"English": "en", | |
"Spanish": "es", | |
"French": "fr", | |
"German": "de", | |
"Italian": "it", | |
"Portuguese": "pt", | |
"Russian": "ru", | |
"Japanese": "ja", | |
"Chinese": "zh", | |
"Hindi": "hi", | |
"Korean": "ko" | |
} | |
def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang): | |
""" | |
Function to extract content from Wikipedia URL (placeholder for now) | |
""" | |
wiki_id = extract_wiki_id(wiki_url) | |
if not wiki_id: | |
return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {} | |
# Get the details of the Wikipedia article | |
wiki_details = get_wiki_details(wiki_id) | |
content_sections = split_content_into_sections(wiki_details['wiki_xml']) | |
return ( | |
"Extraction complete! Sections: " + str(len(content_sections)), | |
wiki_details['pageid'], | |
wiki_details['title'], | |
wiki_details['summary'], | |
wiki_details['wiki_xml'], | |
content_sections | |
) | |
def translate_content(content, article_title, artice_summary, target_lang, api_key, model_id, base_url): | |
llm_client = init_llm_client(api_key, model_id, base_url) | |
translation_prompt = get_translate_prompt( | |
article_title=article_title, | |
artice_summary=artice_summary, | |
original_content=content, | |
target_lang=target_lang | |
) | |
# Call the LLM to get the translation | |
response = llm_client.responses.create( | |
messages=[ | |
{"role": "user", "content": translation_prompt} | |
], | |
model=model_id, | |
max_tokens=2000, | |
temperature=0.5 | |
) | |
def update_ui_with_sections(sections_dict): | |
""" | |
Creates a list of components to display in the sections area | |
""" | |
components = [] | |
if not sections_dict: | |
return [gr.update(visible=False) for _ in range(10)] # Assuming max 10 sections | |
# Create visible components for available sections | |
for section_name, section_content in sections_dict.items(): | |
components.append(gr.update( | |
value=section_content, | |
label=f"Section: {section_name}", | |
visible=True | |
)) | |
# Hide any unused components | |
remaining = 100 - len(components) # Assuming max 100 sections | |
for _ in range(remaining): | |
components.append(gr.update(visible=False)) | |
return components | |
# Create Gradio app | |
with gr.Blocks(theme=gr.themes.Monochrome()) as demo: | |
gr.Markdown("# Wikipedia Translator") | |
# State variable to store sections | |
sections_state = gr.State({}) | |
with gr.Row(): | |
# Sidebar for configuration | |
with gr.Column(scale=1): | |
gr.Markdown("### Configuration") | |
with gr.Group(): | |
api_key = gr.Textbox( | |
label="OpenAI API Key", | |
placeholder="sk-...", | |
type="password", | |
) | |
model_id = gr.Textbox( | |
label="OpenAI Model ID", | |
placeholder="gpt-4.1-mini", | |
) | |
base_url = gr.Textbox( | |
label="OpenAI API Base URL (Optional)", | |
placeholder="https://api.openai.com/v1", | |
info="Leave default unless using a proxy" | |
) | |
target_language = gr.Dropdown( | |
choices=list(LANGUAGES.keys()), | |
value="Spanish", | |
label="Target Language", | |
) | |
gr.Markdown("### About") | |
gr.Markdown(""" | |
This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models. | |
1. Configure your API settings | |
2. Enter a Wikipedia URL | |
3. Click Extract to process the article | |
""") | |
# Main content area | |
with gr.Column(scale=2): | |
gr.Markdown("### Wikipedia Article") | |
wiki_url = gr.Textbox( | |
label="Wikipedia URL", | |
placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence", | |
info="Enter the full URL of the Wikipedia article" | |
) | |
extract_button = gr.Button("Extract and Prepare for Translation", variant="primary") | |
output = gr.Markdown(label="Status") | |
# Results area (will expand in the future) | |
article_pageid = gr.Textbox( | |
label="Article Page ID", | |
placeholder="Page ID will appear here after extraction", | |
interactive=False | |
) | |
article_title = gr.Textbox( | |
label="Article Title", | |
placeholder="Title will appear here after extraction", | |
interactive=False | |
) | |
aticle_summary = gr.Textbox( | |
label="Article Summary", | |
placeholder="Summary will appear here after extraction", | |
interactive=False | |
) | |
article_xml = gr.Textbox( | |
label="Article XML", | |
placeholder="XML will appear here after extraction", | |
interactive=False, | |
visible=False # Hidden by default as it's usually large | |
) | |
# Pre-define section textboxes (limit to 100 for simplicity) | |
gr.Markdown("### Article Sections") | |
with gr.Column() as sections_container: | |
section_textboxes = [ | |
gr.Textbox(visible=False, lines=4) | |
for _ in range(100) # Support up to 100 sections | |
] | |
# Connect the extract button to the function | |
extract_button.click( | |
fn=extract_wikipedia_content, | |
inputs=[wiki_url, api_key, model_id, base_url, target_language], | |
outputs=[ | |
output, | |
article_pageid, | |
article_title, | |
aticle_summary, | |
article_xml, | |
sections_state, | |
] | |
).then( | |
fn=update_ui_with_sections, | |
inputs=[sections_state], | |
outputs=section_textboxes | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |