Spaces:
Running
Running
File size: 2,952 Bytes
94260c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
def extract_wiki_id(wiki_url):
"""
Extracts the Wikipedia ID from the given URL.
"""
import re
match = re.search(r'wiki/([^#?]+)', wiki_url)
return match.group(1) if match else None
def get_wiki_details(wiki_id):
"""
Placeholder function to get Wikipedia details using the wiki ID.
"""
# This should interact with the Wikipedia API or your backend service
# For now, returning dummy data
return {
"pageid": 123456,
"title": "Artificial Intelligence",
"summary": "AI is the simulation of human intelligence in machines.",
"wiki_xml": "<xml>...</xml>",
"sections": {
"Introduction": "AI Introduction content...",
"History": "AI History content...",
"Applications": "AI Applications content...",
}
}
def init_llm_client(api_key, base_url="https://api.openai.com/v1"):
"""
Initializes the LLM client with the given API key and base URL.
"""
import openai
openai.api_key = api_key
openai.api_base = base_url
return openai
def split_content_into_sections(wiki_xml, content_format="Plain Text"):
"""
Split the Wikipedia content into logical sections.
Args:
wiki_xml (str): The XML content of the Wikipedia article
content_format (str): The format to return the content in ("Plain Text" or "XML")
Returns:
dict: A dictionary mapping section names to their content
"""
from xml.etree import ElementTree as ET
# Parse the XML content
root = ET.fromstring(wiki_xml)
sections = {}
for child in root:
# Assuming each child of the root is a section
section_name = child.tag
section_content = ET.tostring(child, encoding='unicode')
# Add to sections dictionary
if content_format == "XML":
sections[section_name] = section_content
else: # Plain Text
try:
# Try to extract text content only
text_content = child.text if child.text else ""
for elem in child.iter():
if elem.text and elem != child:
text_content += " " + elem.text
if elem.tail:
text_content += " " + elem.tail
sections[section_name] = text_content.strip()
except Exception as e:
# Fallback in case of parsing issues
sections[section_name] = f"Error extracting text: {str(e)}"
return sections
def get_translate_prompt(article_title, artice_summary, original_content, target_lang):
"""
Constructs the translation prompt for the LLM.
"""
return f"""
You are a professional translator. Translate the following content to {target_lang}.
Title: {article_title}
Summary: {artice_summary}
Content: {original_content}
""" |