Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from utils import (extract_wiki_id, get_wiki_details, | |
| init_llm_client, split_content_into_sections, | |
| get_translate_prompt) | |
| import json | |
| import json_repair | |
| # Define language options for translation | |
| LANGUAGES = { | |
| "Arabic": "ar", | |
| "English": "en", | |
| "Spanish": "es", | |
| "French": "fr", | |
| "German": "de", | |
| "Italian": "it", | |
| "Portuguese": "pt", | |
| "Russian": "ru", | |
| "Japanese": "ja", | |
| "Chinese": "zh", | |
| "Hindi": "hi", | |
| "Korean": "ko" | |
| } | |
| def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang, content_format): | |
| """ | |
| Function to extract content from Wikipedia URL (placeholder for now) | |
| """ | |
| wiki_id = extract_wiki_id(wiki_url) | |
| if not wiki_id: | |
| return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {} | |
| # Get the details of the Wikipedia article | |
| wiki_details = get_wiki_details(wiki_id) | |
| if content_format == "XML": | |
| content_sections = split_content_into_sections(wiki_details['wiki_xml'], content_format) | |
| else: | |
| content_sections = split_content_into_sections(wiki_details['content'], content_format) | |
| return ( | |
| "Extraction complete! Sections: " + str(len(content_sections)), | |
| wiki_details['pageid'], | |
| wiki_details['title'], | |
| wiki_details['summary'], | |
| wiki_details['wiki_xml'], | |
| content_sections | |
| ) | |
| def translate_content(content, article_title, artice_summary, content_format, | |
| target_lang, api_key, model_id, base_url): | |
| llm_client = init_llm_client(api_key, base_url=base_url) | |
| translation_prompt = get_translate_prompt( | |
| article_title=article_title, | |
| artice_summary=artice_summary, | |
| original_content=content, | |
| target_lang=target_lang, | |
| content_format=content_format | |
| ) | |
| # Call the LLM to get the translation - updating params to match OpenAI's requirements | |
| response = llm_client.chat.completions.create( | |
| model=model_id, | |
| messages=[ | |
| {"role": "user", "content": translation_prompt} | |
| ], | |
| max_tokens=2000, | |
| temperature=0.5 | |
| ) | |
| decoded_object = json_repair.loads(response.choices[0].message.content) | |
| if 'output_content' in decoded_object: | |
| return decoded_object['output_content'] | |
| return "Error: Translation output not found in the response." | |
| def translate_section(section_content, article_title, article_summary, content_format, target_lang, api_key, model_id, base_url): | |
| """ | |
| Translates a single section of the Wikipedia article | |
| """ | |
| if not section_content or not api_key: | |
| return "Please provide content and API key for translation." | |
| return translate_content( | |
| content=section_content, | |
| article_title=article_title, | |
| artice_summary=article_summary, | |
| content_format=content_format, | |
| target_lang=target_lang, | |
| api_key=api_key, | |
| model_id=model_id, | |
| base_url=base_url | |
| ) | |
| def update_ui_with_sections(sections_dict): | |
| """ | |
| Creates a list of components to display in the sections area | |
| """ | |
| components = [] | |
| if not sections_dict: | |
| # Return updates for all components (input, button, output) | |
| empty_updates = [] | |
| for _ in range(100): # Assuming max 100 sections | |
| empty_updates.extend([ | |
| gr.update(visible=False), # section textbox | |
| gr.update(visible=False), # translate button | |
| gr.update(visible=False) # translation output | |
| ]) | |
| return empty_updates | |
| # Create visible components for available sections | |
| for section_name, section_content in sections_dict.items(): | |
| # Update for section content textbox | |
| components.append(gr.update( | |
| value=section_content, | |
| label=f"Section: {section_name}", | |
| visible=True | |
| )) | |
| # Update for translate button | |
| components.append(gr.update( | |
| visible=True, | |
| value=f"Translate {section_name}" | |
| )) | |
| # Update for translation output | |
| components.append(gr.update( | |
| visible=True, | |
| value="", | |
| label=f"Translation: {section_name}" | |
| )) | |
| # Hide any unused components | |
| remaining = 100 - len(sections_dict) # Assuming max 100 sections | |
| for _ in range(remaining): | |
| components.extend([ | |
| gr.update(visible=False), # section textbox | |
| gr.update(visible=False), # translate button | |
| gr.update(visible=False) # translation output | |
| ]) | |
| return components | |
| # Create Gradio app | |
| with gr.Blocks(theme=gr.themes.Monochrome()) as demo: | |
| gr.Markdown("# Wikipedia Translator") | |
| # State variable to store sections | |
| sections_state = gr.State({}) | |
| with gr.Row(): | |
| # Sidebar for configuration | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Configuration") | |
| with gr.Group(): | |
| api_key = gr.Textbox( | |
| label="OpenAI API Key", | |
| placeholder="sk-...", | |
| type="password", | |
| ) | |
| model_id = gr.Textbox( | |
| label="OpenAI Model ID", | |
| placeholder="gpt-4.1-mini", | |
| value="gpt-4.1-mini", | |
| ) | |
| base_url = gr.Textbox( | |
| label="OpenAI API Base URL (Optional)", | |
| placeholder="https://api.openai.com/v1", | |
| info="Leave default unless using a proxy" | |
| ) | |
| target_language = gr.Dropdown( | |
| choices=list(LANGUAGES.keys()), | |
| value="Arabic", | |
| label="Target Language", | |
| ) | |
| content_format = gr.Radio( | |
| choices=["Text", "XML"], | |
| value="XML", | |
| label="Content Format", | |
| info="Choose how to display article content" | |
| ) | |
| gr.Markdown("### About") | |
| gr.Markdown(""" | |
| This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models. | |
| 1. Configure your API settings | |
| 2. Enter a Wikipedia URL | |
| 3. Click Extract to process the article | |
| """) | |
| # Main content area | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Wikipedia Article") | |
| wiki_url = gr.Textbox( | |
| label="Wikipedia URL", | |
| placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence", | |
| info="Enter the full URL of the Wikipedia article" | |
| ) | |
| extract_button = gr.Button("Extract and Prepare for Translation", variant="primary") | |
| output = gr.Markdown(label="Status") | |
| # Results area (will expand in the future) | |
| article_pageid = gr.Textbox( | |
| label="Article Page ID", | |
| placeholder="Page ID will appear here after extraction", | |
| interactive=False | |
| ) | |
| article_title = gr.Textbox( | |
| label="Article Title", | |
| placeholder="Title will appear here after extraction", | |
| interactive=False | |
| ) | |
| aticle_summary = gr.Textbox( | |
| label="Article Summary", | |
| placeholder="Summary will appear here after extraction", | |
| interactive=False | |
| ) | |
| article_xml = gr.Textbox( | |
| label="Article XML", | |
| placeholder="XML will appear here after extraction", | |
| interactive=False, | |
| visible=False # Hidden by default as it's usually large | |
| ) | |
| # Pre-define section textboxes and related components | |
| gr.Markdown("### Article Sections") | |
| with gr.Column() as sections_container: | |
| section_components = [] | |
| for i in range(100): # Support up to 100 sections | |
| with gr.Row(): | |
| section_textbox = gr.Textbox(visible=False, lines=4) | |
| translate_btn = gr.Button("Translate", visible=False) | |
| translation_output = gr.Textbox(visible=False, lines=4) | |
| section_components.extend([section_textbox, translate_btn, translation_output]) | |
| # Connect the translate button to the translation function | |
| translate_btn.click( | |
| fn=translate_section, | |
| inputs=[ | |
| section_textbox, | |
| article_title, | |
| aticle_summary, | |
| content_format, | |
| target_language, | |
| api_key, | |
| model_id, | |
| base_url | |
| ], | |
| outputs=translation_output | |
| ) | |
| # Connect the extract button to the function | |
| extract_button.click( | |
| fn=extract_wikipedia_content, | |
| inputs=[wiki_url, api_key, model_id, base_url, target_language, content_format], | |
| outputs=[ | |
| output, | |
| article_pageid, | |
| article_title, | |
| aticle_summary, | |
| article_xml, | |
| sections_state, | |
| ] | |
| ).then( | |
| fn=update_ui_with_sections, | |
| inputs=[sections_state], | |
| outputs=section_components | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |