AGAZO_Final_Assignment

Sleeping

File size: 8,551 Bytes

f66d8b7

import requests
import wikipediaapi
from langchain_core.tools import tool

@tool
def wikipedia_revision_by_year_keyword(keyword: str, year: int) -> dict:
    """
        Search for a Wikipedia page and get the latest revision from that year.
        This tool allows you to search within the page content.
    """
    search_url = (
        f"https://en.wikipedia.org/w/api.php"
        f"?action=query"
        f"&list=search"
        f"&format=json"
        f"&srsearch={requests.utils.quote(keyword)}"
        f"&srlimit=1"
    )
    search_response = requests.get(search_url, verify=False).json()
    search_results = search_response.get("query", {}).get("search", [])
    if not search_results:
        return {"error": f"No Wikipedia page found for '{keyword}'."}
    title = search_results[0]["title"]

    timestamp = f"{year}-12-31T23:59:59Z"
    rev_url = (
        f"https://en.wikipedia.org/w/api.php"
        f"?action=query"
        f"&format=json"
        f"&prop=revisions"
        f"&titles={requests.utils.quote(title)}"
        f"&rvlimit=1"
        f"&rvprop=timestamp|user|comment|content"
        f"&rvdir=older"
        f"&rvstart={timestamp}"
    )
    rev_response = requests.get(rev_url, verify=False).json()
    pages = rev_response.get("query", {}).get("pages", {})
    page = next(iter(pages.values()), {})
    if "revisions" not in page:
        return {"error": f"No revision found for page '{title}' before {timestamp}."}

    rev = page["revisions"][0]
    return {
        "title": title,
        "timestamp": rev["timestamp"],
        "user": rev["user"],
        "comment": rev.get("comment", ""),
        "content": rev.get("*", "[Content omitted]")
    }


import requests

USER_AGENT = "MyGenericTool/1.0 ([email protected])"  # Replace with your info

def get_all_featured_articles():
    """
    Retrieves a list of titles of all Featured Articles on English Wikipedia.
    """
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        'action': 'query',
        'format': 'json',
        'list': 'categorymembers',
        'cmtitle': 'Category:Featured articles',
        'cmtype': 'page',
        'cmlimit': 'max'
    }
    headers = {'User-Agent': USER_AGENT}

    try:
        response = requests.get(url, params=params, headers=headers, verify=False)
        response.raise_for_status()
        data = response.json()
        featured_articles = [cm['title'] for cm in data.get('query', {}).get('categorymembers', [])]
        return featured_articles
    except requests.exceptions.RequestException as e:
        print(f"Error fetching featured articles: {e}")
        return []

def get_article_promotion_date(title):
    """
    Retrieves the date (YYYY-MM) when an article was promoted to Featured Article status.
    Looks for the date in the talk page history based on "Wikipedia:Featured article candidates" comments.
    """
    talk_page_title = f"Talk:{title}"
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        'action': 'query',
        'format': 'json',
        'titles': talk_page_title,
        'prop': 'revisions',
        'rvprop': 'timestamp|user|comment',
        'rvlimit': '50',  # Adjust limit as needed
        'rvdir': 'older'
    }
    headers = {'User-Agent': USER_AGENT}

    try:
        response = requests.get(url, params=params, headers=headers, verify=False)
        response.raise_for_status()
        data = response.json()
        page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {})
        revisions = page_data.get('revisions', [])
        for rev in revisions:
            if "Wikipedia:Featured article candidates" in rev.get('comment', ''):
                timestamp = rev.get('timestamp')
                if timestamp and "promoted" in rev.get('comment', '').lower():
                    return timestamp[:7]  # Return YYYY-MM
        return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching talk page history for {title}: {e}")
        return None

def get_nomination_user(title, promotion_month_year):
    """
    Retrieves the user who nominated a Featured Article promoted in a specific month and year.
    Looks for the nomination discussion on the talk page.
    """
    talk_page_title = f"Talk:{title}"
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        'action': 'query',
        'format': 'json',
        'titles': talk_page_title,
        'prop': 'revisions',
        'rvprop': 'timestamp|user|comment',
        'rvlimit': '500',  # Adjust limit as needed
        'rvdir': 'older'
    }
    headers = {'User-Agent': USER_AGENT}

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
        page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {})
        revisions = page_data.get('revisions', [])
        nomination_start_comment = None
        for rev in reversed(revisions):  # Look from newest to oldest
            if f"Wikipedia:Featured article candidates/{title}" in rev.get('comment', ''):
                nomination_start_comment = rev
                break

        if nomination_start_comment:
            # Now, go back in history to find who initiated this section
            params_history = {
                'action': 'query',
                'format': 'json',
                'titles': talk_page_title,
                'prop': 'revisions',
                'rvprop': 'timestamp|user|comment',
                'rvlimit': '500',
                'rvdir': 'newer',
                'rvstart': nomination_start_comment['timestamp']
            }
            response_history = requests.get(url, params=params_history, headers=headers)
            response_history.raise_for_status()
            data_history = response_history.json()
            page_data_history = next(iter(data_history.get('query', {}).get('pages', {}).values()), {})
            revisions_history = page_data_history.get('revisions', [])
            if revisions_history:
                return revisions_history[0].get('user')  # The first edit in the section is likely the nominator

        return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching talk page history for {title}: {e}")
        return None

def find_nominator_of_fa_by_promotion_date(month_year):
    """
    Finds the nominator of the (presumably single) Featured Article promoted in the given month and year.

    Args:
        month_year (str): The promotion month and year in 'YYYY-MM' format.

    Returns:
        tuple: A tuple containing the title of the Featured Article and the nominator's username,
               or (None, None) if no single FA was found for that month/year.
    """
    all_featured_articles = get_all_featured_articles()
    promoted_in_month = []
    for article in all_featured_articles:
        promotion_date = get_article_promotion_date(article)
        if promotion_date == month_year:
            promoted_in_month.append(article)

    if len(promoted_in_month) == 1:
        target_article = promoted_in_month[0]
        nominator = get_nomination_user(target_article, month_year)
        return target_article, nominator
    elif not promoted_in_month:
        print(f"No Featured Article was found to be promoted in {month_year}.")
        return None, None
    else:
        print(f"More than one Featured Article was promoted in {month_year}. Please be more specific.")
        return None, None

if __name__ == "__main__":
    import sys
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

    featured_articles = get_all_featured_articles()
    if featured_articles:
        print("All Featured Articles:")
        for article in featured_articles:
            print(article) # Printing Unicode strings should now work if stdout is UTF-8

    target_month_year = "2016-11"  # Example: November 2016
    article_title, nominator_username = find_nominator_of_fa_by_promotion_date(target_month_year)

    if article_title and nominator_username:
        print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year} and was nominated by: {nominator_username}")
    elif article_title:
        print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year}, but the nominator could not be determined.")
    elif article_title is None:
        pass # Message already printed by find_nominator_of_fa_by_promotion_date