AGAZO_Final_Assignment / wikipedia_tool.py
Alexandre Gazola
codigo agente
f66d8b7
raw
history blame
8.55 kB
import requests
import wikipediaapi
from langchain_core.tools import tool
@tool
def wikipedia_revision_by_year_keyword(keyword: str, year: int) -> dict:
"""
Search for a Wikipedia page and get the latest revision from that year.
This tool allows you to search within the page content.
"""
search_url = (
f"https://en.wikipedia.org/w/api.php"
f"?action=query"
f"&list=search"
f"&format=json"
f"&srsearch={requests.utils.quote(keyword)}"
f"&srlimit=1"
)
search_response = requests.get(search_url, verify=False).json()
search_results = search_response.get("query", {}).get("search", [])
if not search_results:
return {"error": f"No Wikipedia page found for '{keyword}'."}
title = search_results[0]["title"]
timestamp = f"{year}-12-31T23:59:59Z"
rev_url = (
f"https://en.wikipedia.org/w/api.php"
f"?action=query"
f"&format=json"
f"&prop=revisions"
f"&titles={requests.utils.quote(title)}"
f"&rvlimit=1"
f"&rvprop=timestamp|user|comment|content"
f"&rvdir=older"
f"&rvstart={timestamp}"
)
rev_response = requests.get(rev_url, verify=False).json()
pages = rev_response.get("query", {}).get("pages", {})
page = next(iter(pages.values()), {})
if "revisions" not in page:
return {"error": f"No revision found for page '{title}' before {timestamp}."}
rev = page["revisions"][0]
return {
"title": title,
"timestamp": rev["timestamp"],
"user": rev["user"],
"comment": rev.get("comment", ""),
"content": rev.get("*", "[Content omitted]")
}
import requests
USER_AGENT = "MyGenericTool/1.0 ([email protected])" # Replace with your info
def get_all_featured_articles():
"""
Retrieves a list of titles of all Featured Articles on English Wikipedia.
"""
url = "https://en.wikipedia.org/w/api.php"
params = {
'action': 'query',
'format': 'json',
'list': 'categorymembers',
'cmtitle': 'Category:Featured articles',
'cmtype': 'page',
'cmlimit': 'max'
}
headers = {'User-Agent': USER_AGENT}
try:
response = requests.get(url, params=params, headers=headers, verify=False)
response.raise_for_status()
data = response.json()
featured_articles = [cm['title'] for cm in data.get('query', {}).get('categorymembers', [])]
return featured_articles
except requests.exceptions.RequestException as e:
print(f"Error fetching featured articles: {e}")
return []
def get_article_promotion_date(title):
"""
Retrieves the date (YYYY-MM) when an article was promoted to Featured Article status.
Looks for the date in the talk page history based on "Wikipedia:Featured article candidates" comments.
"""
talk_page_title = f"Talk:{title}"
url = "https://en.wikipedia.org/w/api.php"
params = {
'action': 'query',
'format': 'json',
'titles': talk_page_title,
'prop': 'revisions',
'rvprop': 'timestamp|user|comment',
'rvlimit': '50', # Adjust limit as needed
'rvdir': 'older'
}
headers = {'User-Agent': USER_AGENT}
try:
response = requests.get(url, params=params, headers=headers, verify=False)
response.raise_for_status()
data = response.json()
page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {})
revisions = page_data.get('revisions', [])
for rev in revisions:
if "Wikipedia:Featured article candidates" in rev.get('comment', ''):
timestamp = rev.get('timestamp')
if timestamp and "promoted" in rev.get('comment', '').lower():
return timestamp[:7] # Return YYYY-MM
return None
except requests.exceptions.RequestException as e:
print(f"Error fetching talk page history for {title}: {e}")
return None
def get_nomination_user(title, promotion_month_year):
"""
Retrieves the user who nominated a Featured Article promoted in a specific month and year.
Looks for the nomination discussion on the talk page.
"""
talk_page_title = f"Talk:{title}"
url = "https://en.wikipedia.org/w/api.php"
params = {
'action': 'query',
'format': 'json',
'titles': talk_page_title,
'prop': 'revisions',
'rvprop': 'timestamp|user|comment',
'rvlimit': '500', # Adjust limit as needed
'rvdir': 'older'
}
headers = {'User-Agent': USER_AGENT}
try:
response = requests.get(url, params=params, headers=headers)
response.raise_for_status()
data = response.json()
page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {})
revisions = page_data.get('revisions', [])
nomination_start_comment = None
for rev in reversed(revisions): # Look from newest to oldest
if f"Wikipedia:Featured article candidates/{title}" in rev.get('comment', ''):
nomination_start_comment = rev
break
if nomination_start_comment:
# Now, go back in history to find who initiated this section
params_history = {
'action': 'query',
'format': 'json',
'titles': talk_page_title,
'prop': 'revisions',
'rvprop': 'timestamp|user|comment',
'rvlimit': '500',
'rvdir': 'newer',
'rvstart': nomination_start_comment['timestamp']
}
response_history = requests.get(url, params=params_history, headers=headers)
response_history.raise_for_status()
data_history = response_history.json()
page_data_history = next(iter(data_history.get('query', {}).get('pages', {}).values()), {})
revisions_history = page_data_history.get('revisions', [])
if revisions_history:
return revisions_history[0].get('user') # The first edit in the section is likely the nominator
return None
except requests.exceptions.RequestException as e:
print(f"Error fetching talk page history for {title}: {e}")
return None
def find_nominator_of_fa_by_promotion_date(month_year):
"""
Finds the nominator of the (presumably single) Featured Article promoted in the given month and year.
Args:
month_year (str): The promotion month and year in 'YYYY-MM' format.
Returns:
tuple: A tuple containing the title of the Featured Article and the nominator's username,
or (None, None) if no single FA was found for that month/year.
"""
all_featured_articles = get_all_featured_articles()
promoted_in_month = []
for article in all_featured_articles:
promotion_date = get_article_promotion_date(article)
if promotion_date == month_year:
promoted_in_month.append(article)
if len(promoted_in_month) == 1:
target_article = promoted_in_month[0]
nominator = get_nomination_user(target_article, month_year)
return target_article, nominator
elif not promoted_in_month:
print(f"No Featured Article was found to be promoted in {month_year}.")
return None, None
else:
print(f"More than one Featured Article was promoted in {month_year}. Please be more specific.")
return None, None
if __name__ == "__main__":
import sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
featured_articles = get_all_featured_articles()
if featured_articles:
print("All Featured Articles:")
for article in featured_articles:
print(article) # Printing Unicode strings should now work if stdout is UTF-8
target_month_year = "2016-11" # Example: November 2016
article_title, nominator_username = find_nominator_of_fa_by_promotion_date(target_month_year)
if article_title and nominator_username:
print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year} and was nominated by: {nominator_username}")
elif article_title:
print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year}, but the nominator could not be determined.")
elif article_title is None:
pass # Message already printed by find_nominator_of_fa_by_promotion_date