def extract_wiki_id(wiki_url): """ Extracts the Wikipedia ID from the given URL. """ import re match = re.search(r'wiki/([^#?]+)', wiki_url) return match.group(1) if match else None def get_wiki_details(wiki_id): """ Placeholder function to get Wikipedia details using the wiki ID. """ # This should interact with the Wikipedia API or your backend service # For now, returning dummy data return { "pageid": 123456, "title": "Artificial Intelligence", "summary": "AI is the simulation of human intelligence in machines.", "wiki_xml": "...", "sections": { "Introduction": "AI Introduction content...", "History": "AI History content...", "Applications": "AI Applications content...", } } def init_llm_client(api_key, base_url="https://api.openai.com/v1"): """ Initializes the LLM client with the given API key and base URL. """ import openai openai.api_key = api_key openai.api_base = base_url return openai def split_content_into_sections(wiki_xml, content_format="Plain Text"): """ Split the Wikipedia content into logical sections. Args: wiki_xml (str): The XML content of the Wikipedia article content_format (str): The format to return the content in ("Plain Text" or "XML") Returns: dict: A dictionary mapping section names to their content """ from xml.etree import ElementTree as ET # Parse the XML content root = ET.fromstring(wiki_xml) sections = {} for child in root: # Assuming each child of the root is a section section_name = child.tag section_content = ET.tostring(child, encoding='unicode') # Add to sections dictionary if content_format == "XML": sections[section_name] = section_content else: # Plain Text try: # Try to extract text content only text_content = child.text if child.text else "" for elem in child.iter(): if elem.text and elem != child: text_content += " " + elem.text if elem.tail: text_content += " " + elem.tail sections[section_name] = text_content.strip() except Exception as e: # Fallback in case of parsing issues sections[section_name] = f"Error extracting text: {str(e)}" return sections def get_translate_prompt(article_title, artice_summary, original_content, target_lang): """ Constructs the translation prompt for the LLM. """ return f""" You are a professional translator. Translate the following content to {target_lang}. Title: {article_title} Summary: {artice_summary} Content: {original_content} """