Spaces:
Sleeping
Sleeping
import requests | |
import wikipediaapi | |
from langchain_core.tools import tool | |
def wikipedia_revision_by_year_keyword(keyword: str, year: int) -> dict: | |
""" | |
Search for a Wikipedia page and get the latest revision from that year. | |
This tool allows you to search within the page content. | |
""" | |
search_url = ( | |
f"https://en.wikipedia.org/w/api.php" | |
f"?action=query" | |
f"&list=search" | |
f"&format=json" | |
f"&srsearch={requests.utils.quote(keyword)}" | |
f"&srlimit=1" | |
) | |
search_response = requests.get(search_url, verify=False).json() | |
search_results = search_response.get("query", {}).get("search", []) | |
if not search_results: | |
return {"error": f"No Wikipedia page found for '{keyword}'."} | |
title = search_results[0]["title"] | |
timestamp = f"{year}-12-31T23:59:59Z" | |
rev_url = ( | |
f"https://en.wikipedia.org/w/api.php" | |
f"?action=query" | |
f"&format=json" | |
f"&prop=revisions" | |
f"&titles={requests.utils.quote(title)}" | |
f"&rvlimit=1" | |
f"&rvprop=timestamp|user|comment|content" | |
f"&rvdir=older" | |
f"&rvstart={timestamp}" | |
) | |
rev_response = requests.get(rev_url, verify=False).json() | |
pages = rev_response.get("query", {}).get("pages", {}) | |
page = next(iter(pages.values()), {}) | |
if "revisions" not in page: | |
return {"error": f"No revision found for page '{title}' before {timestamp}."} | |
rev = page["revisions"][0] | |
return { | |
"title": title, | |
"timestamp": rev["timestamp"], | |
"user": rev["user"], | |
"comment": rev.get("comment", ""), | |
"content": rev.get("*", "[Content omitted]") | |
} | |
import requests | |
USER_AGENT = "MyGenericTool/1.0 ([email protected])" # Replace with your info | |
def get_all_featured_articles(): | |
""" | |
Retrieves a list of titles of all Featured Articles on English Wikipedia. | |
""" | |
url = "https://en.wikipedia.org/w/api.php" | |
params = { | |
'action': 'query', | |
'format': 'json', | |
'list': 'categorymembers', | |
'cmtitle': 'Category:Featured articles', | |
'cmtype': 'page', | |
'cmlimit': 'max' | |
} | |
headers = {'User-Agent': USER_AGENT} | |
try: | |
response = requests.get(url, params=params, headers=headers, verify=False) | |
response.raise_for_status() | |
data = response.json() | |
featured_articles = [cm['title'] for cm in data.get('query', {}).get('categorymembers', [])] | |
return featured_articles | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching featured articles: {e}") | |
return [] | |
def get_article_promotion_date(title): | |
""" | |
Retrieves the date (YYYY-MM) when an article was promoted to Featured Article status. | |
Looks for the date in the talk page history based on "Wikipedia:Featured article candidates" comments. | |
""" | |
talk_page_title = f"Talk:{title}" | |
url = "https://en.wikipedia.org/w/api.php" | |
params = { | |
'action': 'query', | |
'format': 'json', | |
'titles': talk_page_title, | |
'prop': 'revisions', | |
'rvprop': 'timestamp|user|comment', | |
'rvlimit': '50', # Adjust limit as needed | |
'rvdir': 'older' | |
} | |
headers = {'User-Agent': USER_AGENT} | |
try: | |
response = requests.get(url, params=params, headers=headers, verify=False) | |
response.raise_for_status() | |
data = response.json() | |
page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {}) | |
revisions = page_data.get('revisions', []) | |
for rev in revisions: | |
if "Wikipedia:Featured article candidates" in rev.get('comment', ''): | |
timestamp = rev.get('timestamp') | |
if timestamp and "promoted" in rev.get('comment', '').lower(): | |
return timestamp[:7] # Return YYYY-MM | |
return None | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching talk page history for {title}: {e}") | |
return None | |
def get_nomination_user(title, promotion_month_year): | |
""" | |
Retrieves the user who nominated a Featured Article promoted in a specific month and year. | |
Looks for the nomination discussion on the talk page. | |
""" | |
talk_page_title = f"Talk:{title}" | |
url = "https://en.wikipedia.org/w/api.php" | |
params = { | |
'action': 'query', | |
'format': 'json', | |
'titles': talk_page_title, | |
'prop': 'revisions', | |
'rvprop': 'timestamp|user|comment', | |
'rvlimit': '500', # Adjust limit as needed | |
'rvdir': 'older' | |
} | |
headers = {'User-Agent': USER_AGENT} | |
try: | |
response = requests.get(url, params=params, headers=headers) | |
response.raise_for_status() | |
data = response.json() | |
page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {}) | |
revisions = page_data.get('revisions', []) | |
nomination_start_comment = None | |
for rev in reversed(revisions): # Look from newest to oldest | |
if f"Wikipedia:Featured article candidates/{title}" in rev.get('comment', ''): | |
nomination_start_comment = rev | |
break | |
if nomination_start_comment: | |
# Now, go back in history to find who initiated this section | |
params_history = { | |
'action': 'query', | |
'format': 'json', | |
'titles': talk_page_title, | |
'prop': 'revisions', | |
'rvprop': 'timestamp|user|comment', | |
'rvlimit': '500', | |
'rvdir': 'newer', | |
'rvstart': nomination_start_comment['timestamp'] | |
} | |
response_history = requests.get(url, params=params_history, headers=headers) | |
response_history.raise_for_status() | |
data_history = response_history.json() | |
page_data_history = next(iter(data_history.get('query', {}).get('pages', {}).values()), {}) | |
revisions_history = page_data_history.get('revisions', []) | |
if revisions_history: | |
return revisions_history[0].get('user') # The first edit in the section is likely the nominator | |
return None | |
except requests.exceptions.RequestException as e: | |
print(f"Error fetching talk page history for {title}: {e}") | |
return None | |
def find_nominator_of_fa_by_promotion_date(month_year): | |
""" | |
Finds the nominator of the (presumably single) Featured Article promoted in the given month and year. | |
Args: | |
month_year (str): The promotion month and year in 'YYYY-MM' format. | |
Returns: | |
tuple: A tuple containing the title of the Featured Article and the nominator's username, | |
or (None, None) if no single FA was found for that month/year. | |
""" | |
all_featured_articles = get_all_featured_articles() | |
promoted_in_month = [] | |
for article in all_featured_articles: | |
promotion_date = get_article_promotion_date(article) | |
if promotion_date == month_year: | |
promoted_in_month.append(article) | |
if len(promoted_in_month) == 1: | |
target_article = promoted_in_month[0] | |
nominator = get_nomination_user(target_article, month_year) | |
return target_article, nominator | |
elif not promoted_in_month: | |
print(f"No Featured Article was found to be promoted in {month_year}.") | |
return None, None | |
else: | |
print(f"More than one Featured Article was promoted in {month_year}. Please be more specific.") | |
return None, None | |
if __name__ == "__main__": | |
import sys | |
import io | |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') | |
featured_articles = get_all_featured_articles() | |
if featured_articles: | |
print("All Featured Articles:") | |
for article in featured_articles: | |
print(article) # Printing Unicode strings should now work if stdout is UTF-8 | |
target_month_year = "2016-11" # Example: November 2016 | |
article_title, nominator_username = find_nominator_of_fa_by_promotion_date(target_month_year) | |
if article_title and nominator_username: | |
print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year} and was nominated by: {nominator_username}") | |
elif article_title: | |
print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year}, but the nominator could not be determined.") | |
elif article_title is None: | |
pass # Message already printed by find_nominator_of_fa_by_promotion_date | |