Spaces:

traopia
/

Ask-FashionDB

Running

Ask-FashionDB / src /sparql_query_wikibase.py

traopia

Initial commit

cf0b712 29 days ago

4.47 kB



	from urllib.parse import urlparse
	import logging
	log = logging.getLogger(__name__)
	from time import sleep
	import requests
	helpers_session = requests.Session()
	from wikibaseintegrator.wbi_helpers import get_user_agent
	import pandas as pd
	from string import Template
	queries = False
	wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
	config = {
	"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
	'USER_AGENT': 'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
	'WIKIBASE_URL': wikibase_api_url,
	}

	wikibase_properties_id = {'instance of': 'P2',
	'reference URL': 'P24',
	'start time': 'P15',
	'end time': 'P16',
	'occupation title': 'P25',
	'educated at': 'P9',
	'employer': 'P10',
	'work location': 'P7',
	'award received': 'P18',
	'point in time': 'P28',
	'exact match': 'P23',
	'date of birth': 'P3',
	'place of birth': 'P4',
	'date of death': 'P5',
	'country of citizenship': 'P6',
	'occupation': 'P19',
	'sex or gender': 'P8',
	'official website': 'P17',
	'perfumes': 'P27',
	'who wears it': 'P26',
	'inception': 'P11',
	'headquarters location': 'P12',
	'parent organization': 'P13',
	'founded by': 'P14',
	'owned by': 'P22',
	'industry': 'P20',
	'country': 'P30',
	'total revenue': 'P21',
	'designer employed': 'P29',
	'country of origin': 'P30',
	'fashion collection': 'P31',
	'fashion season': 'P32',
	'fashion show location': 'P33',
	'description of fashion collection': 'P34',
	'image of fashion collection': 'P35',
	'editor of fashion collection description': 'P36',
	'date of fashion collection': 'P37',
	'fashion show category': 'P38',
	'fashion house X fashion collection': 'P39'}

	classes_wikibase = {'fashion designer': 'Q5',
	'fashion house': 'Q1',
	'business': 'Q9',
	'academic institution': 'Q2',
	'geographic location': 'Q4',
	'fashion award': 'Q8',
	'gender': 'Q6',
	'occupation': 'Q7',
	'human': 'Q36',
	'organization': 'Q3',
	'brand': 'Q38',
	'lifestyle brand': 'Q3417',
	'privately held company': 'Q1729',
	'fashion season': 'Q8199',
	'fashion show category': 'Q8200',
	'fashion season collection': 'Q8201',
	'fashion journalist': 'Q8207'}


	def execute_sparql_query(query: str, prefix: str \| None = None, endpoint: str \| None = None, user_agent: str \| None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
	"""
	Execute any SPARQL query with the provided parameters.
	"""

	sparql_endpoint_url = str(endpoint or config['SPARQL_ENDPOINT_URL'])
	user_agent = user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None)

	hostname = urlparse(sparql_endpoint_url).hostname
	if hostname and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None:
	log.warning('WARNING: Please set a user agent if you interact with a Wikimedia Foundation instance.')

	if prefix:
	query = prefix + '\n' + query

	headers = {
	'Accept': 'application/sparql-results+json',
	'User-Agent': get_user_agent(user_agent),
	'Content-Type': 'application/sparql-query' # Correct Content-Type
	}

	# Attempt to make the request
	for _ in range(max_retries):
	try:
	# Use 'data' instead of 'params' for the POST request to SPARQL
	response = helpers_session.post(sparql_endpoint_url, data=query, headers=headers)
	except requests.exceptions.ConnectionError as e:
	log.exception("Connection error: %s. Sleeping for %d seconds.", e, retry_after)
	sleep(retry_after)
	continue
	if response.status_code in (500, 502, 503, 504):
	log.error("Service unavailable (HTTP Code %d). Sleeping for %d seconds.", response.status_code, retry_after)
	sleep(retry_after)
	continue
	if response.status_code == 429:
	if 'retry-after' in response.headers:
	retry_after = int(response.headers['retry-after'])
	log.error("Too Many Requests (429). Sleeping for %d seconds", retry_after)
	sleep(retry_after)
	continue
	response.raise_for_status() # Raise any non-success status code
	return response.json() # Return the JSON result if successful

	raise Exception(f"No result after {max_retries} retries.")


	def get_results_to_df( query):
	results = execute_sparql_query(query)
	df = pd.DataFrame(results["results"]["bindings"])
	df = df.map(lambda x: x['value'] if pd.notnull(x) else None)
	return df