| from lxml import html | |
| import requests | |
| import os | |
| client_id = os.environ['LV_CLIENT_ID'] | |
| client_secret = os.environ['LV_CLIENT_SECRET'] | |
| headers = { | |
| 'Content-Type':'application/json', | |
| 'authority':'api.louisvuitton.com', | |
| 'accept':'application/json, text/plain, */*', | |
| 'accept-language':'en-US,en;q=0.6', | |
| 'client_id':client_id, | |
| 'client_secret':client_secret, | |
| 'cookie':'ak_cc=US; OPTOUTMULTI=0:0%7Cc1:0%7Cc2:1%7Cc4:1%7Cc3:1; ATGID=anonymous; SGID=sb.springboot41-prd; SGID=.springboot41-prd; prevURL=; currentURL=https://www.louisvuitton.com/ajax/productsearch; storeLangCommerceHeader=eng-e1; geolocUserZone=eng-us; AKA_A2=A; _abck=94026E30A1FABEBFA844CBD6388965B8~0~YAAQnr8mF5/FGIeIAQAAjjKPkgpw8D5r3zC2HFECV0EQXRXPCoAQ+gYSqR/Sut2w/03toFhfVT9Yda45yFwqUGKStpIxbZAwPw0ooQplfv9eymgzFxyoYjEcqrD6rV/4OEgdCIov1wE0On3Z71z1v9UoZOisQgAlBEKsV0dYi02t6vutjUwi6f5T6N+h6SWX1l62T/QGwvxI13WHrWAOIsNJ1VJd/N3FiC/cKxVKskc3YPJf8tFZ25jOs2cUin5GUXWA7HuLb7dpffVxE0wp5vcOes47KXi9be6zRpbtfF+aHEjVkgPsOjznAFhN/X6FCHBTuJ5UvY2vHlWEIR8kW/pEW4zHm5bfQyaKKiBWQvHTXkeKRyhCnUSoRx9wZPB2dIcdY2igDmG06NT+NWYDPJtLoIU1I14uXhn+p8/w~-1~-1~-1; bm_sz=A8530E728A25BB2F984ED31AD5DF00A6~YAAQnr8mF6LFGIeIAQAAjjKPkhQ72plnxNz48wepUXjRYRETUBQ1oWkIiS8E0wS/+9NHmJzh9bagubSRjmSbTkSEEzyIbBsWTowVRBJBCiW6xO9lhJT/vmSLZtosV4g0eTOhRFoFwRrorwWUjGarPRTLozlk+KpZppYbt+EbdcyCdZNb95EUr6Za65FUi8FRSH02djUkhL5XlC3aebrVqtHfnG7uCJsWYBo+fnDJU1+mxQ8e1J+iTdq4ZdXsYuUuXZz3A0OJMy1JP5M/kKV4JKmIO89y3rgqZZB+xxNLbY+Su4yH7c/q2+s=~3556656~3686978; lv-dispatch=eng-us; consent_ecom_us=functional:1|analytics:0|customization:0|advertising:0; qb_dnt=2; ak_bmsc=589EDDF01ED59E65EA72F7393A895F31~000000000000000000000000000000~YAAQnr8mFxbGGIeIAQAATzaPkhR5K7vu8hiBPm/G/uXQB2vcY/Q4eGJWL+r7GNyViebDR/5XNvUSus0l/5Uhpgq+sHzuQR7L7aYzbFREYRNqvAZ0ngto1AvbYgwL8xA7OJM+D03m8i/BMKZHk++/aC3UFmk/RYNOKIv8p4HjlGM22auYotHx2mlWq8DTZPj1z77noO3WrzpYdEirg6xSV8havW9BHABzNmP3Cfnt3xxVq3y3bMNDcpiXy2+eAJ7eX5UNVa/Rj+P75LGxaPAv4AsXiphTgX6rMKt5T6OAvLMX6vdLLCpZYvDxvtctIZ1o7p1kgBdympPZFvVTPcmDtKrzh9sABV0v2Lls8NzLbbdf2k4B5uyUfy0fQL60zUUZgrlSOoFzgJgQviOHvoVQbqFdXCFyiH+fBjKltnlQXKJ9f0Y5JOjnBIkUHCJ9fUHINiwqt9h2cw5daORuAKdfQpsi3Z3cSVM7cIdb/Dfu0f2vu+K1GOxNv1+NH0KlwbAxMA==; ATG_SESSION_ID=GCUqx-HVSh3KVpIaIX+L30So.front41-prd; _dynSessConf=-86995900442046424; JSESSIONID=GCUqx-HVSh3KVpIaIX+L30So.front41-prd; PIM-SESSION-ID=HBqQRDXDzoWH4e2V; bm_sv=CF0A04BDB7486115978F40C364AB5E38~YAAQnr8mF1opHYeIAQAAT1G5khS7D+LGov3Y87+pRE+B0F86Y3tJjyDrwszLeFA06ZC1s/so8vDcDtmB0VrCDU1N+jlXvFNpGfEkhJTiyQrFAkZq2i57xih3Y4Oe7kDnWY6TqYj05c7rqHmBOUw8+XtLNLJAzSPx+0cKcLqujkgpHiI1xPuO1N27mPXK0SMc2DLsF9MJZa0EqlH8YtoHBgDTz/q2R9Q5g28FJHVuw3SqLHdFDaHO2dML6P/eVsRDeYR3XTd8OQ==~1; lv-dispatch-url=https://us.louisvuitton.com/eng-us/women/handbags/all-handbags/_/N-tfr7qdp; utag_main=_sn:6$_se:25$_ss:0$_st:1686090605436$dc_visit:2$v_id:01884a527895001827510ebf5e3504075002b06d00a61$ses_id:1686086038612%3Bexp-session$_pn:10%3Bexp-session', | |
| 'origin':'https://us.louisvuitton.com', | |
| 'referer':'https://us.louisvuitton.com/', | |
| 'sec-ch-ua':'"Brave";v="113", "Chromium";v="113", "Not-A.Brand";v="24"', | |
| 'sec-ch-ua-mobile':'?0', | |
| 'sec-ch-ua-platform':'"macOS"', | |
| 'sec-fetch-dest':'empty', | |
| 'sec-fetch-mode':'cors', | |
| 'sec-fetch-site':'same-site', | |
| 'sec-gpc':'1', | |
| 'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0', | |
| } | |
| url = 'https://us.louisvuitton.com/eng-us/homepage' | |
| response = requests.get(url, headers=headers) | |
| print(len(response.text)) | |
| tree = html.fromstring(response.text) | |
| sitemap_paths = tree.xpath('//*[@id="header"]//a/@href') | |
| categories = {} | |
| for p in sitemap_paths: | |
| parts = p.split("/") | |
| if parts[-1].startswith("N-") : | |
| categories[parts[-3]]= {"code":parts[-1][2:]} | |
| import requests | |
| for cat_key in categories.keys(): | |
| print(cat_key) | |
| code = categories[cat_key]["code"] | |
| url = f'https://api.louisvuitton.com/eco-eu/search-merch-eapi/v1/eng-us/plp/products/{code}-ay1ygzo25?page=' | |
| print(url) | |
| nbPages = 100 | |
| page = 0 | |
| all_hits = [] | |
| while page < nbPages: | |
| response = requests.get(url + str(page), headers=headers) | |
| res = response.json() | |
| if "nbPages" in res: | |
| nbPages = res["nbPages"] | |
| all_hits.extend(res["hits"]) | |
| print(res["page"], len(all_hits)) | |
| page = page + 1 | |
| recs = [] | |
| misses = [] | |
| for hit in all_hits: | |
| try: | |
| rec = {} | |
| for key in ["productId", "name", "url", "disambiguatingDescription"]: | |
| rec[key] = hit[key] | |
| rec["image"] = hit["image"][0]["contentUrl"] | |
| recs.append(rec) | |
| except: | |
| misses.append(rec) | |
| print(hit) | |
| categories[cat_key]["urls"] = recs | |
| categories[cat_key]["misses"] = misses | |
| flatten = [] | |
| for cat in categories.keys(): | |
| info = categories[cat] | |
| for rec in categories[cat]["urls"]: | |
| doc = {} | |
| doc["productId"] = rec["productId"] | |
| doc["category"] = cat | |
| doc["category_code"] = info["code"] | |
| doc["name"] = rec["name"] | |
| doc["url"] = rec["url"] | |
| doc["disambiguatingDescription"] = rec["disambiguatingDescription"] | |
| flatten.append(doc) | |
| import pandas as pd | |
| dat = pd.DataFrame(flatten) | |
| size = str(400) | |
| dat['image_fix'] = dat['url'].str.replace("{IMG_HEIGHT}", size).str.replace("{IMG_WIDTH}", size) | |
| import os | |
| for rec in dat.itertuples(): | |
| os.system(f""" | |
| curl '{rec.image_fix}'\ | |
| -H 'authority: us.louisvuitton.com' \ | |
| -H 'accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8' \ | |
| -H 'accept-language: en-US,en;q=0.9' \ | |
| -H 'cache-control: no-cache' \ | |
| -H 'cookie: ak_cc=US; OPTOUTMULTI=0:0%7Cc1:0%7Cc2:1%7Cc4:1%7Cc3:1; ATGID=anonymous; SGID=sb.springboot41-prd; prevURL=; currentURL=https://www.louisvuitton.com/ajax/productsearch; storeLangCommerceHeader=eng-e1; geolocUserZone=eng-us; AKA_A2=A; _abck=94026E30A1FABEBFA844CBD6388965B8~0~YAAQnr8mF5/FGIeIAQAAjjKPkgpw8D5r3zC2HFECV0EQXRXPCoAQ+gYSqR/Sut2w/03toFhfVT9Yda45yFwqUGKStpIxbZAwPw0ooQplfv9eymgzFxyoYjEcqrD6rV/4OEgdCIov1wE0On3Z71z1v9UoZOisQgAlBEKsV0dYi02t6vutjUwi6f5T6N+h6SWX1l62T/QGwvxI13WHrWAOIsNJ1VJd/N3FiC/cKxVKskc3YPJf8tFZ25jOs2cUin5GUXWA7HuLb7dpffVxE0wp5vcOes47KXi9be6zRpbtfF+aHEjVkgPsOjznAFhN/X6FCHBTuJ5UvY2vHlWEIR8kW/pEW4zHm5bfQyaKKiBWQvHTXkeKRyhCnUSoRx9wZPB2dIcdY2igDmG06NT+NWYDPJtLoIU1I14uXhn+p8/w~-1~-1~-1; bm_sz=A8530E728A25BB2F984ED31AD5DF00A6~YAAQnr8mF6LFGIeIAQAAjjKPkhQ72plnxNz48wepUXjRYRETUBQ1oWkIiS8E0wS/+9NHmJzh9bagubSRjmSbTkSEEzyIbBsWTowVRBJBCiW6xO9lhJT/vmSLZtosV4g0eTOhRFoFwRrorwWUjGarPRTLozlk+KpZppYbt+EbdcyCdZNb95EUr6Za65FUi8FRSH02djUkhL5XlC3aebrVqtHfnG7uCJsWYBo+fnDJU1+mxQ8e1J+iTdq4ZdXsYuUuXZz3A0OJMy1JP5M/kKV4JKmIO89y3rgqZZB+xxNLbY+Su4yH7c/q2+s=~3556656~3686978; lv-dispatch=eng-us; consent_ecom_us=functional:1|analytics:0|customization:0|advertising:0; qb_dnt=2; ak_bmsc=589EDDF01ED59E65EA72F7393A895F31~000000000000000000000000000000~YAAQnr8mFxbGGIeIAQAATzaPkhR5K7vu8hiBPm/G/uXQB2vcY/Q4eGJWL+r7GNyViebDR/5XNvUSus0l/5Uhpgq+sHzuQR7L7aYzbFREYRNqvAZ0ngto1AvbYgwL8xA7OJM+D03m8i/BMKZHk++/aC3UFmk/RYNOKIv8p4HjlGM22auYotHx2mlWq8DTZPj1z77noO3WrzpYdEirg6xSV8havW9BHABzNmP3Cfnt3xxVq3y3bMNDcpiXy2+eAJ7eX5UNVa/Rj+P75LGxaPAv4AsXiphTgX6rMKt5T6OAvLMX6vdLLCpZYvDxvtctIZ1o7p1kgBdympPZFvVTPcmDtKrzh9sABV0v2Lls8NzLbbdf2k4B5uyUfy0fQL60zUUZgrlSOoFzgJgQviOHvoVQbqFdXCFyiH+fBjKltnlQXKJ9f0Y5JOjnBIkUHCJ9fUHINiwqt9h2cw5daORuAKdfQpsi3Z3cSVM7cIdb/Dfu0f2vu+K1GOxNv1+NH0KlwbAxMA==; PIM-SESSION-ID=HBqQRDXDzoWH4e2V; lv-dispatch-url=https://us.louisvuitton.com/eng-us/women/handbags/all-handbags/_/N-tfr7qdp; utag_main=_sn:6$_se:28$_ss:0$_st:1686090718084$dc_visit:2$v_id:01884a527895001827510ebf5e3504075002b06d00a61$ses_id:1686086038612%3Bexp-session$_pn:11%3Bexp-session; anonymous_session=true; ATG_SESSION_ID=B-8233ZEyNVjFobJCNa+2INd.front41-prd; _dynSessConf=2255588875954406228; JSESSIONID=B-8233ZEyNVjFobJCNa+2INd.front41-prd; bm_sv=CF0A04BDB7486115978F40C364AB5E38~YAAQnr8mF1xwHYeIAQAAlSm7khS9xE3iX2eAH+anrXfnlQ7v8oUsEA0z/MTkPXZnzBAvGACc64Rw7A1Y5WpGYwhqMubbtv5eueVsxuxlhE/aJNZtgpkk/epZage/P7W27HdoknmpCXGdYaFsYNTqrcCrNXOS/DUkUdvE6OjHGDg6c+05MfuLiHR+zMzGM/mZzxWzV3ruLRv6toIGPskD/LkRrOU0j8B7alLLaAhQKTvZVFPlcgbho2BHD8rHcNL0E7h2pvdGvA==~1' \ | |
| -H 'pragma: no-cache' \ | |
| -H 'referer: https://us.louisvuitton.com/eng-us/products/twist-belt-chain-wallet-epi-nvprod1740047v/M68560' \ | |
| -H 'sec-ch-ua: "Not.A/Brand";v="8", "Chromium";v="114", "Brave";v="114"' \ | |
| -H 'sec-ch-ua-mobile: ?0' \ | |
| -H 'sec-ch-ua-platform: "macOS"' \ | |
| -H 'sec-fetch-dest: image' \ | |
| -H 'sec-fetch-mode: no-cors' \ | |
| -H 'sec-fetch-site: same-origin' \ | |
| -H 'sec-gpc: 1' \ | |
| -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' \ | |
| --compressed --output 'images/{rec.productId}.png' | |
| """) | |