bakrianoo's picture
setup llm parser
c25ce6b
raw
history blame
6.63 kB
import gradio as gr
from utils import (extract_wiki_id, get_wiki_details,
init_llm_client, split_content_into_sections,
get_translate_prompt)
import json
# Define language options for translation
LANGUAGES = {
"Arabic": "ar",
"English": "en",
"Spanish": "es",
"French": "fr",
"German": "de",
"Italian": "it",
"Portuguese": "pt",
"Russian": "ru",
"Japanese": "ja",
"Chinese": "zh",
"Hindi": "hi",
"Korean": "ko"
}
def extract_wikipedia_content(wiki_url, api_key, model_id, base_url, target_lang):
"""
Function to extract content from Wikipedia URL (placeholder for now)
"""
wiki_id = extract_wiki_id(wiki_url)
if not wiki_id:
return "Invalid Wikipedia URL. Please check the URL and try again.", None, None, None, None, {}
# Get the details of the Wikipedia article
wiki_details = get_wiki_details(wiki_id)
content_sections = split_content_into_sections(wiki_details['wiki_xml'])
return (
"Extraction complete! Sections: " + str(len(content_sections)),
wiki_details['pageid'],
wiki_details['title'],
wiki_details['summary'],
wiki_details['wiki_xml'],
content_sections
)
def translate_content(content, article_title, artice_summary, target_lang, api_key, model_id, base_url):
llm_client = init_llm_client(api_key, model_id, base_url)
translation_prompt = get_translate_prompt(
article_title=article_title,
artice_summary=artice_summary,
original_content=content,
target_lang=target_lang
)
# Call the LLM to get the translation
response = llm_client.responses.create(
messages=[
{"role": "user", "content": translation_prompt}
],
model=model_id,
max_tokens=2000,
temperature=0.5
)
def update_ui_with_sections(sections_dict):
"""
Creates a list of components to display in the sections area
"""
components = []
if not sections_dict:
return [gr.update(visible=False) for _ in range(10)] # Assuming max 10 sections
# Create visible components for available sections
for section_name, section_content in sections_dict.items():
components.append(gr.update(
value=section_content,
label=f"Section: {section_name}",
visible=True
))
# Hide any unused components
remaining = 100 - len(components) # Assuming max 100 sections
for _ in range(remaining):
components.append(gr.update(visible=False))
return components
# Create Gradio app
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
gr.Markdown("# Wikipedia Translator")
# State variable to store sections
sections_state = gr.State({})
with gr.Row():
# Sidebar for configuration
with gr.Column(scale=1):
gr.Markdown("### Configuration")
with gr.Group():
api_key = gr.Textbox(
label="OpenAI API Key",
placeholder="sk-...",
type="password",
)
model_id = gr.Textbox(
label="OpenAI Model ID",
placeholder="gpt-4.1-mini",
)
base_url = gr.Textbox(
label="OpenAI API Base URL (Optional)",
placeholder="https://api.openai.com/v1",
info="Leave default unless using a proxy"
)
target_language = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="Spanish",
label="Target Language",
)
gr.Markdown("### About")
gr.Markdown("""
This tool extracts content from Wikipedia articles and translates them into your selected language using OpenAI's language models.
1. Configure your API settings
2. Enter a Wikipedia URL
3. Click Extract to process the article
""")
# Main content area
with gr.Column(scale=2):
gr.Markdown("### Wikipedia Article")
wiki_url = gr.Textbox(
label="Wikipedia URL",
placeholder="https://en.wikipedia.org/wiki/Artificial_intelligence",
info="Enter the full URL of the Wikipedia article"
)
extract_button = gr.Button("Extract and Prepare for Translation", variant="primary")
output = gr.Markdown(label="Status")
# Results area (will expand in the future)
article_pageid = gr.Textbox(
label="Article Page ID",
placeholder="Page ID will appear here after extraction",
interactive=False
)
article_title = gr.Textbox(
label="Article Title",
placeholder="Title will appear here after extraction",
interactive=False
)
aticle_summary = gr.Textbox(
label="Article Summary",
placeholder="Summary will appear here after extraction",
interactive=False
)
article_xml = gr.Textbox(
label="Article XML",
placeholder="XML will appear here after extraction",
interactive=False,
visible=False # Hidden by default as it's usually large
)
# Pre-define section textboxes (limit to 100 for simplicity)
gr.Markdown("### Article Sections")
with gr.Column() as sections_container:
section_textboxes = [
gr.Textbox(visible=False, lines=4)
for _ in range(100) # Support up to 100 sections
]
# Connect the extract button to the function
extract_button.click(
fn=extract_wikipedia_content,
inputs=[wiki_url, api_key, model_id, base_url, target_language],
outputs=[
output,
article_pageid,
article_title,
aticle_summary,
article_xml,
sections_state,
]
).then(
fn=update_ui_with_sections,
inputs=[sections_state],
outputs=section_textboxes
)
# Launch the app
if __name__ == "__main__":
demo.launch()