Spaces:

traopia
/

Ask-FashionDB

Running

File size: 4,472 Bytes



from urllib.parse import urlparse
import logging
log = logging.getLogger(__name__)
from time import sleep
import requests
helpers_session = requests.Session()
from wikibaseintegrator.wbi_helpers import get_user_agent
import pandas as pd
from string import Template
queries = False
wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
config = {
"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
'USER_AGENT':  'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
'WIKIBASE_URL': wikibase_api_url,
}

wikibase_properties_id = {'instance of': 'P2',
 'reference URL': 'P24',
 'start time': 'P15',
 'end time': 'P16',
 'occupation title': 'P25',
 'educated at': 'P9',
 'employer': 'P10',
 'work location': 'P7',
 'award received': 'P18',
 'point in time': 'P28',
 'exact match': 'P23',
 'date of birth': 'P3',
 'place of birth': 'P4',
 'date of death': 'P5',
 'country of citizenship': 'P6',
 'occupation': 'P19',
 'sex or gender': 'P8',
 'official website': 'P17',
 'perfumes': 'P27',
 'who wears it': 'P26',
 'inception': 'P11',
 'headquarters location': 'P12',
 'parent organization': 'P13',
 'founded by': 'P14',
 'owned by': 'P22',
 'industry': 'P20',
 'country': 'P30',
 'total revenue': 'P21',
 'designer employed': 'P29',
 'country of origin': 'P30',
 'fashion collection': 'P31',
 'fashion season': 'P32',
 'fashion show location': 'P33',
 'description of fashion collection': 'P34',
 'image of fashion collection': 'P35',
 'editor of fashion collection description': 'P36',
 'date of fashion collection': 'P37',
 'fashion show category': 'P38',
 'fashion house X fashion collection': 'P39'}

classes_wikibase = {'fashion designer': 'Q5',
 'fashion house': 'Q1',
 'business': 'Q9',
 'academic institution': 'Q2',
 'geographic location': 'Q4',
 'fashion award': 'Q8',
 'gender': 'Q6',
 'occupation': 'Q7',
 'human': 'Q36',
 'organization': 'Q3',
 'brand': 'Q38',
 'lifestyle brand': 'Q3417',
 'privately held company': 'Q1729',
 'fashion season': 'Q8199',
 'fashion show category': 'Q8200',
 'fashion season collection': 'Q8201',
 'fashion journalist': 'Q8207'}


def execute_sparql_query(query: str, prefix: str | None = None, endpoint: str | None = None, user_agent: str | None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
    """
    Execute any SPARQL query with the provided parameters.
    """

    sparql_endpoint_url = str(endpoint or config['SPARQL_ENDPOINT_URL'])
    user_agent = user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None)

    hostname = urlparse(sparql_endpoint_url).hostname
    if hostname and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None:
        log.warning('WARNING: Please set a user agent if you interact with a Wikimedia Foundation instance.')

    if prefix:
        query = prefix + '\n' + query

    headers = {
        'Accept': 'application/sparql-results+json',
        'User-Agent': get_user_agent(user_agent),
        'Content-Type': 'application/sparql-query'  # Correct Content-Type
    }

    # Attempt to make the request
    for _ in range(max_retries):
        try:
            # Use 'data' instead of 'params' for the POST request to SPARQL
            response = helpers_session.post(sparql_endpoint_url, data=query, headers=headers)
        except requests.exceptions.ConnectionError as e:
            log.exception("Connection error: %s. Sleeping for %d seconds.", e, retry_after)
            sleep(retry_after)
            continue
        if response.status_code in (500, 502, 503, 504):
            log.error("Service unavailable (HTTP Code %d). Sleeping for %d seconds.", response.status_code, retry_after)
            sleep(retry_after)
            continue
        if response.status_code == 429:
            if 'retry-after' in response.headers:
                retry_after = int(response.headers['retry-after'])
            log.error("Too Many Requests (429). Sleeping for %d seconds", retry_after)
            sleep(retry_after)
            continue
        response.raise_for_status()  # Raise any non-success status code
        return response.json()  # Return the JSON result if successful

    raise Exception(f"No result after {max_retries} retries.")


def get_results_to_df( query):
    results = execute_sparql_query(query)
    df = pd.DataFrame(results["results"]["bindings"])
    df = df.map(lambda x: x['value'] if pd.notnull(x) else None)
    return df