Spaces:
Sleeping
Sleeping
traopia
commited on
Commit
·
dfef9ae
1
Parent(s):
06625d2
it is working now
Browse files- src/__pycache__/generate_queries_alternative.cpython-310.pyc +0 -0
- src/__pycache__/questions_queries.cpython-310.pyc +0 -0
- src/__pycache__/sparql_query_wikibase.cpython-310.pyc +0 -0
- src/__pycache__/use_llm.cpython-310.pyc +0 -0
- src/__pycache__/visual_qa.cpython-310.pyc +0 -0
- src/__pycache__/wikibase_helpers.cpython-310.pyc +0 -0
- src/generate_queries_alternative.py +12 -79
- src/questions_queries.py +44 -51
- src/sparql_query_wikibase.py +64 -109
- src/visual_qa.py +8 -7
- src/wikibase_helpers.py +0 -171
src/__pycache__/generate_queries_alternative.cpython-310.pyc
ADDED
|
Binary file (17 kB). View file
|
|
|
src/__pycache__/questions_queries.cpython-310.pyc
ADDED
|
Binary file (34.7 kB). View file
|
|
|
src/__pycache__/sparql_query_wikibase.cpython-310.pyc
ADDED
|
Binary file (4.42 kB). View file
|
|
|
src/__pycache__/use_llm.cpython-310.pyc
ADDED
|
Binary file (1.43 kB). View file
|
|
|
src/__pycache__/visual_qa.cpython-310.pyc
ADDED
|
Binary file (3.4 kB). View file
|
|
|
src/__pycache__/wikibase_helpers.cpython-310.pyc
ADDED
|
Binary file (5.52 kB). View file
|
|
|
src/generate_queries_alternative.py
CHANGED
|
@@ -7,6 +7,7 @@ import chromadb
|
|
| 7 |
import spacy
|
| 8 |
import numpy as np
|
| 9 |
import os
|
|
|
|
| 10 |
#os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 11 |
|
| 12 |
import spacy
|
|
@@ -20,63 +21,7 @@ def get_nlp():
|
|
| 20 |
return spacy.load("en_core_web_sm")
|
| 21 |
nlp = get_nlp()
|
| 22 |
|
| 23 |
-
|
| 24 |
-
'reference URL': 'P24',
|
| 25 |
-
'start time': 'P15',
|
| 26 |
-
'end time': 'P16',
|
| 27 |
-
'occupation title': 'P25',
|
| 28 |
-
'educated at': 'P9',
|
| 29 |
-
'employer': 'P10',
|
| 30 |
-
'work location': 'P7',
|
| 31 |
-
'award received': 'P18',
|
| 32 |
-
'point in time': 'P28',
|
| 33 |
-
'exact match': 'P23',
|
| 34 |
-
'date of birth': 'P3',
|
| 35 |
-
'place of birth': 'P4',
|
| 36 |
-
'date of death': 'P5',
|
| 37 |
-
'country of citizenship': 'P6',
|
| 38 |
-
'occupation': 'P19',
|
| 39 |
-
'sex or gender': 'P8',
|
| 40 |
-
'official website': 'P17',
|
| 41 |
-
'perfumes': 'P27',
|
| 42 |
-
'who wears it': 'P26',
|
| 43 |
-
'inception': 'P11',
|
| 44 |
-
'headquarters location': 'P12',
|
| 45 |
-
'parent organization': 'P13',
|
| 46 |
-
'founded by': 'P14',
|
| 47 |
-
'owned by': 'P22',
|
| 48 |
-
'industry': 'P20',
|
| 49 |
-
'country': 'P30',
|
| 50 |
-
'total revenue': 'P21',
|
| 51 |
-
'designer employed': 'P29',
|
| 52 |
-
'country of origin': 'P30',
|
| 53 |
-
'fashion collection': 'P31',
|
| 54 |
-
'fashion season': 'P32',
|
| 55 |
-
'fashion show location': 'P33',
|
| 56 |
-
'description of fashion collection': 'P34',
|
| 57 |
-
'image of fashion collection': 'P35',
|
| 58 |
-
'editor of fashion collection description': 'P36',
|
| 59 |
-
'date of fashion collection': 'P37',
|
| 60 |
-
'fashion show category': 'P38',
|
| 61 |
-
'fashion house X fashion collection': 'P39'}
|
| 62 |
-
|
| 63 |
-
classes_wikibase = {'fashion designer': 'Q5',
|
| 64 |
-
'fashion house': 'Q1',
|
| 65 |
-
'business': 'Q9',
|
| 66 |
-
'academic institution': 'Q2',
|
| 67 |
-
'geographic location': 'Q4',
|
| 68 |
-
'fashion award': 'Q8',
|
| 69 |
-
'gender': 'Q6',
|
| 70 |
-
'occupation': 'Q7',
|
| 71 |
-
'human': 'Q36',
|
| 72 |
-
'organization': 'Q3',
|
| 73 |
-
'brand': 'Q38',
|
| 74 |
-
'lifestyle brand': 'Q3417',
|
| 75 |
-
'privately held company': 'Q1729',
|
| 76 |
-
'fashion season': 'Q8199',
|
| 77 |
-
'fashion show category': 'Q8200',
|
| 78 |
-
'fashion season collection': 'Q8201',
|
| 79 |
-
'fashion journalist': 'Q8207'}
|
| 80 |
|
| 81 |
questions_queries_all = [{ "question": education_most_popular_question, "query": education_most_popular_query},
|
| 82 |
{ "question": how_many_designers_per_fashion_house_question, "query": how_many_designers_per_fashion_house_query},
|
|
@@ -91,19 +36,12 @@ questions_queries_all = [{ "question": education_most_popular_question, "query":
|
|
| 91 |
{"question": collections_jeans_question, "query": collections_jeans_query},
|
| 92 |
{"question": creative_directors_school_question, "query": creative_directors_school_query},
|
| 93 |
{"question": fashion_houses_thematic_collection_question, "query": fashion_houses_thematic_collection_query},
|
| 94 |
-
|
| 95 |
-
{ "question": designer_fashion_house_directors_question
|
| 96 |
-
{ "question": country_designer_question
|
| 97 |
-
{ "question": designer_order_fashion_collection_question
|
| 98 |
-
{ "question": designer_fashion_director_question2
|
| 99 |
-
{ "question": year_designers_birth_question
|
| 100 |
-
# {"question": fashion_house_directors_question.substitute({ "x": f"{"Chanel"}"}), "query": fashion_house_directors_query.substitute({ "x": f"'{"Chanel"}'"})},
|
| 101 |
-
# { "question": designer_fashion_house_directors_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_house_directors_query.substitute({ "x": f"'{"Alexander McQueen"}'"})},
|
| 102 |
-
# {"question": country_designer_question.substitute({ "x": f"{"Italy"}"}), "query": country_designer_query.substitute({ "x": f"'{"Italy"}'"})},
|
| 103 |
-
# { "question": designer_order_fashion_collection_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_order_fashion_collection_query.substitute({ "x": f"'{"Alexander McQueen"}'"})},
|
| 104 |
-
# {"question": designer_fashion_director_question2.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_director_query2.substitute({ "x": f"'{"Alexander McQueen"}'"})},
|
| 105 |
-
# { "question": year_designers_birth_question.substitute({ "x": 1970}), "query": year_designers_birth_query.substitute({ "x": 1970})},
|
| 106 |
-
# { "question": fashion_collection_images_question.substitute({ "x": f"{"Chanel"}"}), "query": fashion_collection_images_query.substitute({ "x": f"'{"Chanel"}'"})}
|
| 107 |
]
|
| 108 |
|
| 109 |
if os.path.exists("web_app/query_log.json"):
|
|
@@ -206,8 +144,7 @@ def similarity_question(question, questions_queries_dictionary, collection, n_re
|
|
| 206 |
|
| 207 |
# Store each unique document in the vector embedding database
|
| 208 |
for i, d in enumerate(masked_documents):
|
| 209 |
-
|
| 210 |
-
embedding = response[0] # Extract the first (and only) embedding from the nested list
|
| 211 |
|
| 212 |
# Check if embedding is unique
|
| 213 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
|
@@ -222,8 +159,7 @@ def similarity_question(question, questions_queries_dictionary, collection, n_re
|
|
| 222 |
|
| 223 |
# Compute the embedding for the input question
|
| 224 |
masked_question = mask_entities(question, nlp)
|
| 225 |
-
|
| 226 |
-
query_embedding = response[0] # Extract embedding
|
| 227 |
|
| 228 |
results = collection.query(
|
| 229 |
query_embeddings=[query_embedding], # Ensure correct format
|
|
@@ -265,9 +201,7 @@ def similarity_question_no_masking(question, questions_queries_dictionary, colle
|
|
| 265 |
|
| 266 |
# Store each unique document in the vector embedding database
|
| 267 |
for i, d in enumerate(original_documents):
|
| 268 |
-
|
| 269 |
-
embedding = response[0] # Extract the first (and only) embedding from the nested list
|
| 270 |
-
|
| 271 |
# Check if embedding is unique
|
| 272 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
| 273 |
if not is_duplicate:
|
|
@@ -281,8 +215,7 @@ def similarity_question_no_masking(question, questions_queries_dictionary, colle
|
|
| 281 |
|
| 282 |
# Compute the embedding for the input question
|
| 283 |
|
| 284 |
-
|
| 285 |
-
query_embedding = response[0] # Extract embedding
|
| 286 |
|
| 287 |
results = collection.query(
|
| 288 |
query_embeddings=[query_embedding], # Ensure correct format
|
|
|
|
| 7 |
import spacy
|
| 8 |
import numpy as np
|
| 9 |
import os
|
| 10 |
+
from src.sparql_query_wikibase import wikibase_properties_id, classes_wikibase
|
| 11 |
#os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 12 |
|
| 13 |
import spacy
|
|
|
|
| 21 |
return spacy.load("en_core_web_sm")
|
| 22 |
nlp = get_nlp()
|
| 23 |
|
| 24 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
questions_queries_all = [{ "question": education_most_popular_question, "query": education_most_popular_query},
|
| 27 |
{ "question": how_many_designers_per_fashion_house_question, "query": how_many_designers_per_fashion_house_query},
|
|
|
|
| 36 |
{"question": collections_jeans_question, "query": collections_jeans_query},
|
| 37 |
{"question": creative_directors_school_question, "query": creative_directors_school_query},
|
| 38 |
{"question": fashion_houses_thematic_collection_question, "query": fashion_houses_thematic_collection_query},
|
| 39 |
+
{"question": fashion_house_directors_question, "query": fashion_house_directors_query},
|
| 40 |
+
{ "question": designer_fashion_house_directors_question, "query": designer_fashion_house_directors_query},
|
| 41 |
+
{ "question": country_designer_question, "query": country_designer_query},
|
| 42 |
+
{ "question": designer_order_fashion_collection_question, "query": designer_order_fashion_collection_query},
|
| 43 |
+
{ "question": designer_fashion_director_question2, "query": designer_fashion_director_query2},
|
| 44 |
+
{ "question": year_designers_birth_question, "query": year_designers_birth_query}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
]
|
| 46 |
|
| 47 |
if os.path.exists("web_app/query_log.json"):
|
|
|
|
| 144 |
|
| 145 |
# Store each unique document in the vector embedding database
|
| 146 |
for i, d in enumerate(masked_documents):
|
| 147 |
+
embedding = get_embeddings(d)[0]
|
|
|
|
| 148 |
|
| 149 |
# Check if embedding is unique
|
| 150 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
|
|
|
| 159 |
|
| 160 |
# Compute the embedding for the input question
|
| 161 |
masked_question = mask_entities(question, nlp)
|
| 162 |
+
query_embedding = get_embeddings(d)[0]
|
|
|
|
| 163 |
|
| 164 |
results = collection.query(
|
| 165 |
query_embeddings=[query_embedding], # Ensure correct format
|
|
|
|
| 201 |
|
| 202 |
# Store each unique document in the vector embedding database
|
| 203 |
for i, d in enumerate(original_documents):
|
| 204 |
+
embedding = get_embeddings(d)[0]
|
|
|
|
|
|
|
| 205 |
# Check if embedding is unique
|
| 206 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
| 207 |
if not is_duplicate:
|
|
|
|
| 215 |
|
| 216 |
# Compute the embedding for the input question
|
| 217 |
|
| 218 |
+
query_embedding = get_embeddings(question)[0]
|
|
|
|
| 219 |
|
| 220 |
results = collection.query(
|
| 221 |
query_embeddings=[query_embedding], # Ensure correct format
|
src/questions_queries.py
CHANGED
|
@@ -5,8 +5,8 @@ from src.sparql_query_wikibase import *
|
|
| 5 |
|
| 6 |
|
| 7 |
|
| 8 |
-
fashion_collection_images_question =
|
| 9 |
-
fashion_collection_images_query =
|
| 10 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 11 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 12 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
@@ -17,9 +17,9 @@ PREFIX prov: <http://www.w3.org/ns/prov#>
|
|
| 17 |
|
| 18 |
SELECT ?statement ?fashion_collectionLabel ?image_url ?reference_URL
|
| 19 |
?fashion_seasonLabel (YEAR(?fashion_year) AS ?year) ?categoryLabel WHERE {
|
| 20 |
-
{ ?fashion_house rdfs:label
|
| 21 |
UNION
|
| 22 |
-
{ ?fashion_house skos:altLabel
|
| 23 |
|
| 24 |
|
| 25 |
?fashion_house p:P31 ?statement.
|
|
@@ -39,10 +39,10 @@ SELECT ?statement ?fashion_collectionLabel ?image_url ?reference_URL
|
|
| 39 |
}
|
| 40 |
|
| 41 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } }
|
| 42 |
-
"""
|
| 43 |
|
| 44 |
-
year_designers_birth_question =
|
| 45 |
-
year_designers_birth_query =
|
| 46 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 47 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 48 |
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
|
@@ -53,13 +53,13 @@ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
|
| 53 |
|
| 54 |
SELECT ?designerLabel ?birthdate WHERE {
|
| 55 |
?designer wbt:P3 ?birthdate .
|
| 56 |
-
FILTER (YEAR(?birthdate) =
|
| 57 |
|
| 58 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 59 |
-
} """
|
| 60 |
|
| 61 |
-
fashion_house_directors_question =
|
| 62 |
-
fashion_house_directors_query =
|
| 63 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 64 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 65 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
@@ -74,9 +74,9 @@ SELECT
|
|
| 74 |
WHERE {
|
| 75 |
# Fashion house identified by brand name
|
| 76 |
?fashion_house wbt:P2 wb:Q1.
|
| 77 |
-
{ ?fashion_house rdfs:label
|
| 78 |
UNION
|
| 79 |
-
{ ?fashion_house skos:altLabel
|
| 80 |
|
| 81 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P10
|
| 82 |
{
|
|
@@ -139,12 +139,12 @@ WHERE {
|
|
| 139 |
FILTER(?reference_URL_source != "")
|
| 140 |
# Retrieve labels
|
| 141 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 142 |
-
} ORDER BY ?start_year """
|
| 143 |
|
| 144 |
|
| 145 |
|
| 146 |
-
designer_fashion_house_directors_question =
|
| 147 |
-
designer_fashion_house_directors_query =
|
| 148 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 149 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 150 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
@@ -160,9 +160,9 @@ SELECT ?fashion_houseLabel
|
|
| 160 |
WHERE {
|
| 161 |
# Designer linked to fashion house through roles or founded_by
|
| 162 |
?fashion_designer wbt:P2 wb:Q5.
|
| 163 |
-
{ ?fashion_designer rdfs:label
|
| 164 |
UNION
|
| 165 |
-
{ ?fashion_designer skos:altLabel
|
| 166 |
|
| 167 |
# Link through employer (P10)
|
| 168 |
?fashion_designer p:P10 ?statement.
|
|
@@ -200,11 +200,11 @@ WHERE {
|
|
| 200 |
|
| 201 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 202 |
}
|
| 203 |
-
"""
|
| 204 |
|
| 205 |
|
| 206 |
-
designer_fashion_director_question2 =
|
| 207 |
-
designer_fashion_director_query2 =
|
| 208 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 209 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 210 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
@@ -217,9 +217,9 @@ SELECT
|
|
| 217 |
WHERE {
|
| 218 |
# Fashion designer identified by name
|
| 219 |
?creative_director wbt:P2 wb:Q5.
|
| 220 |
-
{ ?creative_director rdfs:label
|
| 221 |
UNION
|
| 222 |
-
{ ?creative_director skos:altLabel
|
| 223 |
|
| 224 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P30
|
| 225 |
{
|
|
@@ -256,10 +256,10 @@ WHERE {
|
|
| 256 |
|
| 257 |
# Retrieve labels
|
| 258 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 259 |
-
} ORDER BY ?start_year """
|
| 260 |
|
| 261 |
-
country_designer_question =
|
| 262 |
-
country_designer_query =
|
| 263 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 264 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 265 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
@@ -276,9 +276,9 @@ SELECT ?fashion_designerLabel (SAMPLE(?statement) AS ?statement)
|
|
| 276 |
# Access the statement for the country relationship
|
| 277 |
?fashion_designer p:P6 ?statement.
|
| 278 |
?statement ps:P6 ?country.
|
| 279 |
-
{ ?country rdfs:label
|
| 280 |
UNION
|
| 281 |
-
{ ?country skos:altLabel
|
| 282 |
|
| 283 |
# Retrieve references from the statement
|
| 284 |
OPTIONAL {
|
|
@@ -290,9 +290,9 @@ SELECT ?fashion_designerLabel (SAMPLE(?statement) AS ?statement)
|
|
| 290 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 291 |
}
|
| 292 |
GROUP BY ?fashion_designerLabel
|
| 293 |
-
ORDER BY ?fashion_designerLabel """
|
| 294 |
|
| 295 |
-
country_designer_query =
|
| 296 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 297 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 298 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
@@ -306,9 +306,9 @@ SELECT ?fashion_designerLabel (SAMPLE(?reference_URL) AS ?reference_URL) {
|
|
| 306 |
?fashion_designer wbt:P2 wb:Q5.
|
| 307 |
?fashion_designer wbt:P6 ?country.
|
| 308 |
?fashion_designer p:P6 ?statement.
|
| 309 |
-
{ ?country rdfs:label
|
| 310 |
UNION
|
| 311 |
-
{ ?country skos:altLabel
|
| 312 |
|
| 313 |
|
| 314 |
# Retrieve references from the statement
|
|
@@ -321,11 +321,11 @@ SELECT ?fashion_designerLabel (SAMPLE(?reference_URL) AS ?reference_URL) {
|
|
| 321 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 322 |
}
|
| 323 |
GROUP BY ?fashion_designerLabel
|
| 324 |
-
ORDER BY ?fashion_designerLabel """
|
| 325 |
|
| 326 |
|
| 327 |
-
designer_order_fashion_collection_question =
|
| 328 |
-
designer_order_fashion_collection_query =
|
| 329 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 330 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 331 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
@@ -340,9 +340,9 @@ SELECT ?fashion_houseLabel
|
|
| 340 |
(YEAR(?fashion_year) as ?year ) ?fashion_seasonLabel ?categoryLabel WHERE {
|
| 341 |
# Designer linked to fashion house through roles or founded_by
|
| 342 |
?fashion_designer wbt:P2 wb:Q5.
|
| 343 |
-
{ ?fashion_designer rdfs:label
|
| 344 |
UNION
|
| 345 |
-
{ ?fashion_designer skos:altLabel
|
| 346 |
|
| 347 |
|
| 348 |
# Link through "creative director" or other roles
|
|
@@ -384,7 +384,7 @@ FILTER(
|
|
| 384 |
}
|
| 385 |
|
| 386 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 387 |
-
} ORDER BY ?year """
|
| 388 |
|
| 389 |
|
| 390 |
education_most_popular_question = "What are the most popular educational institutions for fashion designers?"
|
|
@@ -683,20 +683,13 @@ questions_queries_dict = [ { "question": education_most_popular_question, "query
|
|
| 683 |
{"question": collections_sustainability_theme_question, "query": collections_sustainability_theme_query},
|
| 684 |
]
|
| 685 |
|
| 686 |
-
# questions_queries_dict_entity = [{"question": fashion_house_directors_question.substitute({ "x": f"{"Chanel"}"}), "query": fashion_house_directors_query},
|
| 687 |
-
# { "question": designer_fashion_house_directors_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_house_directors_query},
|
| 688 |
-
# { "question": country_designer_question.substitute({ "x": f"{"Italy"}"}), "query": country_designer_query},
|
| 689 |
-
# { "question": designer_order_fashion_collection_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_order_fashion_collection_query},
|
| 690 |
-
# { "question": designer_fashion_director_question2.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_director_query2},
|
| 691 |
-
# { "question": year_designers_birth_question.substitute({ "x": f"{"1970"}"}), "query": year_designers_birth_query}]
|
| 692 |
-
|
| 693 |
|
| 694 |
-
questions_queries_dict_entity = [{"question": fashion_house_directors_question
|
| 695 |
-
{ "question": designer_fashion_house_directors_question
|
| 696 |
-
{ "question": country_designer_question
|
| 697 |
-
{ "question": designer_order_fashion_collection_question
|
| 698 |
-
{ "question": designer_fashion_director_question2
|
| 699 |
-
{ "question": year_designers_birth_question
|
| 700 |
|
| 701 |
|
| 702 |
# Designers
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
|
| 8 |
+
fashion_collection_images_question = "Images of collections produced by Chanel"
|
| 9 |
+
fashion_collection_images_query = """
|
| 10 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 11 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 12 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
|
|
| 17 |
|
| 18 |
SELECT ?statement ?fashion_collectionLabel ?image_url ?reference_URL
|
| 19 |
?fashion_seasonLabel (YEAR(?fashion_year) AS ?year) ?categoryLabel WHERE {
|
| 20 |
+
{ ?fashion_house rdfs:label "Chanel"@en.} # Match label
|
| 21 |
UNION
|
| 22 |
+
{ ?fashion_house skos:altLabel "Chanel"@en. } # Match alias
|
| 23 |
|
| 24 |
|
| 25 |
?fashion_house p:P31 ?statement.
|
|
|
|
| 39 |
}
|
| 40 |
|
| 41 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } }
|
| 42 |
+
"""
|
| 43 |
|
| 44 |
+
year_designers_birth_question = "Which designers were born in 1963?"
|
| 45 |
+
year_designers_birth_query = """
|
| 46 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 47 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 48 |
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
|
|
|
| 53 |
|
| 54 |
SELECT ?designerLabel ?birthdate WHERE {
|
| 55 |
?designer wbt:P3 ?birthdate .
|
| 56 |
+
FILTER (YEAR(?birthdate) = 1963)
|
| 57 |
|
| 58 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 59 |
+
} """
|
| 60 |
|
| 61 |
+
fashion_house_directors_question = "Which designers have been creative directors of Chanel ?"
|
| 62 |
+
fashion_house_directors_query = """
|
| 63 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 64 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 65 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
|
|
| 74 |
WHERE {
|
| 75 |
# Fashion house identified by brand name
|
| 76 |
?fashion_house wbt:P2 wb:Q1.
|
| 77 |
+
{ ?fashion_house rdfs:label "Chanel"@en.} # Match label
|
| 78 |
UNION
|
| 79 |
+
{ ?fashion_house skos:altLabel "Chanel"@en. } # Match alias
|
| 80 |
|
| 81 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P10
|
| 82 |
{
|
|
|
|
| 139 |
FILTER(?reference_URL_source != "")
|
| 140 |
# Retrieve labels
|
| 141 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 142 |
+
} ORDER BY ?start_year """
|
| 143 |
|
| 144 |
|
| 145 |
|
| 146 |
+
designer_fashion_house_directors_question = "In which fashion house did Alessandro Michele work as a creative director?"
|
| 147 |
+
designer_fashion_house_directors_query = """
|
| 148 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 149 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 150 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
|
|
| 160 |
WHERE {
|
| 161 |
# Designer linked to fashion house through roles or founded_by
|
| 162 |
?fashion_designer wbt:P2 wb:Q5.
|
| 163 |
+
{ ?fashion_designer rdfs:label "Alessandro Michele"@en.} # Match label
|
| 164 |
UNION
|
| 165 |
+
{ ?fashion_designer skos:altLabel "Alessandro Michele"@en. } # Match alias
|
| 166 |
|
| 167 |
# Link through employer (P10)
|
| 168 |
?fashion_designer p:P10 ?statement.
|
|
|
|
| 200 |
|
| 201 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 202 |
}
|
| 203 |
+
"""
|
| 204 |
|
| 205 |
|
| 206 |
+
designer_fashion_director_question2 = "In which fashion house did Alessandro Michele work as a creative director?"
|
| 207 |
+
designer_fashion_director_query2 = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 208 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 209 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 210 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
|
| 217 |
WHERE {
|
| 218 |
# Fashion designer identified by name
|
| 219 |
?creative_director wbt:P2 wb:Q5.
|
| 220 |
+
{ ?creative_director rdfs:label "Alessandro Michele"@en.} # Match label
|
| 221 |
UNION
|
| 222 |
+
{ ?creative_director skos:altLabel "Alessandro Michele"@en. } # Match alias
|
| 223 |
|
| 224 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P30
|
| 225 |
{
|
|
|
|
| 256 |
|
| 257 |
# Retrieve labels
|
| 258 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 259 |
+
} ORDER BY ?start_year """
|
| 260 |
|
| 261 |
+
country_designer_question = "Which designers are from Italy ?"
|
| 262 |
+
country_designer_query = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 263 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 264 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 265 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
|
| 276 |
# Access the statement for the country relationship
|
| 277 |
?fashion_designer p:P6 ?statement.
|
| 278 |
?statement ps:P6 ?country.
|
| 279 |
+
{ ?country rdfs:label "Italy"@en.} # Match label
|
| 280 |
UNION
|
| 281 |
+
{ ?country skos:altLabel "Italy"@en. } # Match alias
|
| 282 |
|
| 283 |
# Retrieve references from the statement
|
| 284 |
OPTIONAL {
|
|
|
|
| 290 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 291 |
}
|
| 292 |
GROUP BY ?fashion_designerLabel
|
| 293 |
+
ORDER BY ?fashion_designerLabel """
|
| 294 |
|
| 295 |
+
country_designer_query = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 296 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 297 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 298 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
|
| 306 |
?fashion_designer wbt:P2 wb:Q5.
|
| 307 |
?fashion_designer wbt:P6 ?country.
|
| 308 |
?fashion_designer p:P6 ?statement.
|
| 309 |
+
{ ?country rdfs:label "Italy"@en.} # Match label
|
| 310 |
UNION
|
| 311 |
+
{ ?country skos:altLabel "Italy"@en. } # Match alias
|
| 312 |
|
| 313 |
|
| 314 |
# Retrieve references from the statement
|
|
|
|
| 321 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 322 |
}
|
| 323 |
GROUP BY ?fashion_designerLabel
|
| 324 |
+
ORDER BY ?fashion_designerLabel """
|
| 325 |
|
| 326 |
|
| 327 |
+
designer_order_fashion_collection_question = "Which fashion collections did Alessandro Michele work on?"
|
| 328 |
+
designer_order_fashion_collection_query = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 329 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 330 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 331 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
|
| 340 |
(YEAR(?fashion_year) as ?year ) ?fashion_seasonLabel ?categoryLabel WHERE {
|
| 341 |
# Designer linked to fashion house through roles or founded_by
|
| 342 |
?fashion_designer wbt:P2 wb:Q5.
|
| 343 |
+
{ ?fashion_designer rdfs:label "Alessandro Michele"@en.} # Match label
|
| 344 |
UNION
|
| 345 |
+
{ ?fashion_designer skos:altLabel "Alessandro Michele"@en. } # Match alias
|
| 346 |
|
| 347 |
|
| 348 |
# Link through "creative director" or other roles
|
|
|
|
| 384 |
}
|
| 385 |
|
| 386 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 387 |
+
} ORDER BY ?year """
|
| 388 |
|
| 389 |
|
| 390 |
education_most_popular_question = "What are the most popular educational institutions for fashion designers?"
|
|
|
|
| 683 |
{"question": collections_sustainability_theme_question, "query": collections_sustainability_theme_query},
|
| 684 |
]
|
| 685 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
|
| 687 |
+
questions_queries_dict_entity = [{"question": fashion_house_directors_question, "query": fashion_house_directors_query},
|
| 688 |
+
{ "question": designer_fashion_house_directors_question, "query": designer_fashion_house_directors_query},
|
| 689 |
+
{ "question": country_designer_question, "query": country_designer_query},
|
| 690 |
+
{ "question": designer_order_fashion_collection_question, "query": designer_order_fashion_collection_query},
|
| 691 |
+
{ "question": designer_fashion_director_question2, "query": designer_fashion_director_query2},
|
| 692 |
+
{ "question": year_designers_birth_question, "query": year_designers_birth_query}]
|
| 693 |
|
| 694 |
|
| 695 |
# Designers
|
src/sparql_query_wikibase.py
CHANGED
|
@@ -10,7 +10,70 @@ from wikibaseintegrator.wbi_helpers import get_user_agent
|
|
| 10 |
import pandas as pd
|
| 11 |
from string import Template
|
| 12 |
queries = False
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def execute_sparql_query(query: str, prefix: str | None = None, endpoint: str | None = None, user_agent: str | None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
|
|
@@ -65,111 +128,3 @@ def get_results_to_df( query):
|
|
| 65 |
df = df.map(lambda x: x['value'] if pd.notnull(x) else None)
|
| 66 |
return df
|
| 67 |
|
| 68 |
-
if queries:
|
| 69 |
-
query_fashion_designers_template = Template("""
|
| 70 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 71 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 72 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 73 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
| 74 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
| 75 |
-
|
| 76 |
-
SELECT ?fashionDesignerLabel ?fashionDesigner WHERE {
|
| 77 |
-
?fashionDesigner wbt:$instance_of wb:$fashion_designer.
|
| 78 |
-
|
| 79 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 80 |
-
} ORDER BY ?fashionDesignerLabel
|
| 81 |
-
""")
|
| 82 |
-
query_fashion_designers = query_fashion_designers_template.substitute(
|
| 83 |
-
{
|
| 84 |
-
"instance_of": wikibase_properties_id["instance of"],
|
| 85 |
-
"fashion_designer": classes_wikibase["fashion designer"],
|
| 86 |
-
|
| 87 |
-
}
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
query_fashion_houses_template = Template("""
|
| 91 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 92 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 93 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 94 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
| 95 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
| 96 |
-
|
| 97 |
-
SELECT ?fashionHouseLabel ?fashionHouse WHERE {
|
| 98 |
-
?fashionHouse wbt:$instance_of wb:$fashion_house.
|
| 99 |
-
|
| 100 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 101 |
-
} ORDER BY ?fashionHouseLabel
|
| 102 |
-
""")
|
| 103 |
-
#query_fashion_designers = query_fashion_designers_template.substitute(wikidata_properties_id["occupation"], fashion_designer = classes_wikidata["fashion designer"], grand_couturier = classes_wikidata["grand couturier"])
|
| 104 |
-
query_fashion_house= query_fashion_houses_template.substitute(
|
| 105 |
-
{
|
| 106 |
-
"instance_of": wikibase_properties_id["instance of"],
|
| 107 |
-
"fashion_house": classes_wikibase["fashion house"],
|
| 108 |
-
|
| 109 |
-
}
|
| 110 |
-
)
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
query_school_template = Template("""
|
| 115 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 116 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 117 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 118 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
| 119 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
| 120 |
-
|
| 121 |
-
SELECT ?fashionSchoolLabel WHERE {
|
| 122 |
-
?fashionSchool wbt:$instance_of wb:$academic_institution.
|
| 123 |
-
|
| 124 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 125 |
-
} ORDER BY ?fashionSchoolLabel
|
| 126 |
-
""")
|
| 127 |
-
#query_fashion_designers = query_fashion_designers_template.substitute(wikidata_properties_id["occupation"], fashion_designer = classes_wikidata["fashion designer"], grand_couturier = classes_wikidata["grand couturier"])
|
| 128 |
-
query_school = query_school_template.substitute(
|
| 129 |
-
{
|
| 130 |
-
"instance_of": wikibase_properties_id["instance of"],
|
| 131 |
-
"academic_institution": classes_wikibase["academic institution"],
|
| 132 |
-
})
|
| 133 |
-
|
| 134 |
-
query_award_template = Template("""
|
| 135 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
| 136 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
| 137 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
| 138 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
| 139 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
| 140 |
-
|
| 141 |
-
SELECT ?fashionAwardLabel WHERE {
|
| 142 |
-
?fashionAward wbt:$instance_of wb:$fashion_award.
|
| 143 |
-
|
| 144 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
| 145 |
-
} ORDER BY ?fashionAwardLabel
|
| 146 |
-
""")
|
| 147 |
-
#query_fashion_designers = query_fashion_designers_template.substitute(wikidata_properties_id["occupation"], fashion_designer = classes_wikidata["fashion designer"], grand_couturier = classes_wikidata["grand couturier"])
|
| 148 |
-
query_award = query_award_template.substitute(
|
| 149 |
-
{
|
| 150 |
-
"instance_of": wikibase_properties_id["instance of"],
|
| 151 |
-
"fashion_award": classes_wikibase["fashion award"],
|
| 152 |
-
})
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
def get_fashion_designers_wikibase(output_file):
|
| 157 |
-
df_designers = get_results_to_df(query_fashion_designers)
|
| 158 |
-
df_designers.to_csv(output_file, index=False)
|
| 159 |
-
return get_results_to_df(query_fashion_designers)
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
def get_fashion_houses_wikibase(output_file):
|
| 163 |
-
df_fashion_houses = get_results_to_df(query_fashion_house)
|
| 164 |
-
df_fashion_houses.to_csv(output_file, index=False)
|
| 165 |
-
return get_results_to_df(query_fashion_house)
|
| 166 |
-
|
| 167 |
-
def get_schools_wikibase(output_file):
|
| 168 |
-
df_schools = get_results_to_df(query_school)
|
| 169 |
-
df_schools.to_csv(output_file, index=False)
|
| 170 |
-
return get_results_to_df(query_school)
|
| 171 |
-
|
| 172 |
-
def get_awards_wikibase(output_file):
|
| 173 |
-
df_awards = get_results_to_df(query_award)
|
| 174 |
-
df_awards.to_csv(output_file, index=False)
|
| 175 |
-
return get_results_to_df(query_award)
|
|
|
|
| 10 |
import pandas as pd
|
| 11 |
from string import Template
|
| 12 |
queries = False
|
| 13 |
+
wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
|
| 14 |
+
config = {
|
| 15 |
+
"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
|
| 16 |
+
'USER_AGENT': 'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
|
| 17 |
+
'WIKIBASE_URL': wikibase_api_url,
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
wikibase_properties_id = {'instance of': 'P2',
|
| 21 |
+
'reference URL': 'P24',
|
| 22 |
+
'start time': 'P15',
|
| 23 |
+
'end time': 'P16',
|
| 24 |
+
'occupation title': 'P25',
|
| 25 |
+
'educated at': 'P9',
|
| 26 |
+
'employer': 'P10',
|
| 27 |
+
'work location': 'P7',
|
| 28 |
+
'award received': 'P18',
|
| 29 |
+
'point in time': 'P28',
|
| 30 |
+
'exact match': 'P23',
|
| 31 |
+
'date of birth': 'P3',
|
| 32 |
+
'place of birth': 'P4',
|
| 33 |
+
'date of death': 'P5',
|
| 34 |
+
'country of citizenship': 'P6',
|
| 35 |
+
'occupation': 'P19',
|
| 36 |
+
'sex or gender': 'P8',
|
| 37 |
+
'official website': 'P17',
|
| 38 |
+
'perfumes': 'P27',
|
| 39 |
+
'who wears it': 'P26',
|
| 40 |
+
'inception': 'P11',
|
| 41 |
+
'headquarters location': 'P12',
|
| 42 |
+
'parent organization': 'P13',
|
| 43 |
+
'founded by': 'P14',
|
| 44 |
+
'owned by': 'P22',
|
| 45 |
+
'industry': 'P20',
|
| 46 |
+
'country': 'P30',
|
| 47 |
+
'total revenue': 'P21',
|
| 48 |
+
'designer employed': 'P29',
|
| 49 |
+
'country of origin': 'P30',
|
| 50 |
+
'fashion collection': 'P31',
|
| 51 |
+
'fashion season': 'P32',
|
| 52 |
+
'fashion show location': 'P33',
|
| 53 |
+
'description of fashion collection': 'P34',
|
| 54 |
+
'image of fashion collection': 'P35',
|
| 55 |
+
'editor of fashion collection description': 'P36',
|
| 56 |
+
'date of fashion collection': 'P37',
|
| 57 |
+
'fashion show category': 'P38',
|
| 58 |
+
'fashion house X fashion collection': 'P39'}
|
| 59 |
+
|
| 60 |
+
classes_wikibase = {'fashion designer': 'Q5',
|
| 61 |
+
'fashion house': 'Q1',
|
| 62 |
+
'business': 'Q9',
|
| 63 |
+
'academic institution': 'Q2',
|
| 64 |
+
'geographic location': 'Q4',
|
| 65 |
+
'fashion award': 'Q8',
|
| 66 |
+
'gender': 'Q6',
|
| 67 |
+
'occupation': 'Q7',
|
| 68 |
+
'human': 'Q36',
|
| 69 |
+
'organization': 'Q3',
|
| 70 |
+
'brand': 'Q38',
|
| 71 |
+
'lifestyle brand': 'Q3417',
|
| 72 |
+
'privately held company': 'Q1729',
|
| 73 |
+
'fashion season': 'Q8199',
|
| 74 |
+
'fashion show category': 'Q8200',
|
| 75 |
+
'fashion season collection': 'Q8201',
|
| 76 |
+
'fashion journalist': 'Q8207'}
|
| 77 |
|
| 78 |
|
| 79 |
def execute_sparql_query(query: str, prefix: str | None = None, endpoint: str | None = None, user_agent: str | None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
|
|
|
|
| 128 |
df = df.map(lambda x: x['value'] if pd.notnull(x) else None)
|
| 129 |
return df
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/visual_qa.py
CHANGED
|
@@ -9,7 +9,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
| 9 |
from datasets import load_dataset
|
| 10 |
import chromadb
|
| 11 |
|
| 12 |
-
from datetime import datetime
|
| 13 |
|
| 14 |
def initialize_collection(collection_name="clip_image_embeddings"):
|
| 15 |
# Initialize ChromaDB client (PersistentClient stores embeddings between runs)
|
|
@@ -73,7 +73,7 @@ device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
|
|
| 73 |
model = CLIPModel.from_pretrained(model_name).to(device)
|
| 74 |
processor = CLIPProcessor.from_pretrained(model_name)
|
| 75 |
|
| 76 |
-
def main_text_retrieve_images(text, result_query=None, n_retrieved=
|
| 77 |
|
| 78 |
|
| 79 |
|
|
@@ -86,6 +86,7 @@ def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
|
|
| 86 |
|
| 87 |
# Convert to pandas DataFrame
|
| 88 |
df_emb = full_dataset.to_pandas()
|
|
|
|
| 89 |
df_emb = df_emb.drop_duplicates(subset='image_urls')
|
| 90 |
df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
|
| 91 |
df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)
|
|
@@ -112,14 +113,14 @@ def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
|
|
| 112 |
query_embeddings=text_features[0],
|
| 113 |
n_results=n_retrieved
|
| 114 |
)
|
| 115 |
-
|
| 116 |
result_doc = pd.DataFrame(results['documents'][0], columns=["image_url"])
|
| 117 |
df_result = df_emb.merge(result_doc[['image_url']], on='image_url', how='inner')
|
| 118 |
# Remove columns fashion_clip_image, image_urls, and description
|
| 119 |
df_result = df_result.drop(columns=['fashion_clip_image', 'description', 'editor', 'publish_date', 'image_urls'])
|
| 120 |
return df_result.to_dict(orient='records')
|
| 121 |
|
| 122 |
-
if __name__ == "__main__":
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
| 9 |
from datasets import load_dataset
|
| 10 |
import chromadb
|
| 11 |
|
| 12 |
+
from datetime import datetime
|
| 13 |
|
| 14 |
def initialize_collection(collection_name="clip_image_embeddings"):
|
| 15 |
# Initialize ChromaDB client (PersistentClient stores embeddings between runs)
|
|
|
|
| 73 |
model = CLIPModel.from_pretrained(model_name).to(device)
|
| 74 |
processor = CLIPProcessor.from_pretrained(model_name)
|
| 75 |
|
| 76 |
+
def main_text_retrieve_images(text, result_query=None, n_retrieved=5):
|
| 77 |
|
| 78 |
|
| 79 |
|
|
|
|
| 86 |
|
| 87 |
# Convert to pandas DataFrame
|
| 88 |
df_emb = full_dataset.to_pandas()
|
| 89 |
+
#print(df_emb.head()) # Debugging statement
|
| 90 |
df_emb = df_emb.drop_duplicates(subset='image_urls')
|
| 91 |
df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
|
| 92 |
df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)
|
|
|
|
| 113 |
query_embeddings=text_features[0],
|
| 114 |
n_results=n_retrieved
|
| 115 |
)
|
| 116 |
+
|
| 117 |
result_doc = pd.DataFrame(results['documents'][0], columns=["image_url"])
|
| 118 |
df_result = df_emb.merge(result_doc[['image_url']], on='image_url', how='inner')
|
| 119 |
# Remove columns fashion_clip_image, image_urls, and description
|
| 120 |
df_result = df_result.drop(columns=['fashion_clip_image', 'description', 'editor', 'publish_date', 'image_urls'])
|
| 121 |
return df_result.to_dict(orient='records')
|
| 122 |
|
| 123 |
+
# if __name__ == "__main__":
|
| 124 |
+
# text = "dress"
|
| 125 |
+
# result = main_text_retrieve_images(text)
|
| 126 |
+
# print(result)
|
src/wikibase_helpers.py
DELETED
|
@@ -1,171 +0,0 @@
|
|
| 1 |
-
import requests
|
| 2 |
-
import logging
|
| 3 |
-
from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_helpers
|
| 4 |
-
from wikibaseintegrator.wbi_config import config
|
| 5 |
-
from wikibaseintegrator.wbi_exceptions import MWApiError
|
| 6 |
-
from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, SearchError
|
| 7 |
-
from wikibaseintegrator import WikibaseIntegrator, wbi_login
|
| 8 |
-
login_wikibase = wbi_login.Login(user="Traopia", password="diqfiz-wodnI4-jafwax", mediawiki_api_url="https://fashionwiki.wikibase.cloud/w/api.php")
|
| 9 |
-
|
| 10 |
-
wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
|
| 11 |
-
config = {
|
| 12 |
-
"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
|
| 13 |
-
'USER_AGENT': 'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
|
| 14 |
-
'WIKIBASE_URL': wikibase_api_url,
|
| 15 |
-
}
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# List of valid language codes (can be expanded)
|
| 21 |
-
VALID_LANGUAGE_CODES = ['en']
|
| 22 |
-
|
| 23 |
-
def get_property_id_by_label(property_label, api_url):
|
| 24 |
-
"""
|
| 25 |
-
Resolve the property label to its corresponding property ID from Wikibase.
|
| 26 |
-
|
| 27 |
-
Args:
|
| 28 |
-
property_label (str): The label of the property to search.
|
| 29 |
-
api_url (str): The API URL of the target Wikibase or Wikidata.
|
| 30 |
-
|
| 31 |
-
Returns:
|
| 32 |
-
str: The property ID if found, otherwise None.
|
| 33 |
-
"""
|
| 34 |
-
url = f'{api_url}/w/api.php?action=wbsearchentities&search={property_label}&language=en&type=property&format=json'
|
| 35 |
-
response = requests.get(url)
|
| 36 |
-
|
| 37 |
-
if response.status_code == 200:
|
| 38 |
-
search_results = response.json()
|
| 39 |
-
if 'search' in search_results and search_results['search']:
|
| 40 |
-
# Return the first matching property ID
|
| 41 |
-
return search_results['search'][0]['id']
|
| 42 |
-
else:
|
| 43 |
-
logging.info(f"No property found for label: {property_label}")
|
| 44 |
-
return None
|
| 45 |
-
else:
|
| 46 |
-
logging.error(f"Failed to search for property by label in the target Wikibase. HTTP Status Code: {response.status_code}")
|
| 47 |
-
return None
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
def get_entity_id_by_label(search_string,wiki, dict_result=False) -> list:
|
| 51 |
-
"""
|
| 52 |
-
Performs a search for entities in the Wikibase instance using labels and aliases.
|
| 53 |
-
You can have more information on the parameters in the MediaWiki API help (https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities)
|
| 54 |
-
|
| 55 |
-
:param search_string: A string which should be searched for in the Wikibase instance (labels and aliases)
|
| 56 |
-
:param wiki: The wiki to search in. It can be "wikidata" or "wikibase"
|
| 57 |
-
:param dict_result: If True, the result will be a list of dictionaries with the keys 'id', 'label', 'match', 'description' and 'aliases'. If False, the result will be a list of strings with the entity IDs.
|
| 58 |
-
:return: A list of dictionaries or strings with the search results
|
| 59 |
-
"""
|
| 60 |
-
|
| 61 |
-
if wiki == "wikibase":
|
| 62 |
-
login = login_wikibase
|
| 63 |
-
mediawiki_api_url = wikibase_api_url
|
| 64 |
-
|
| 65 |
-
language = "en"
|
| 66 |
-
strict_language = False
|
| 67 |
-
|
| 68 |
-
params = {
|
| 69 |
-
'action': 'wbsearchentities',
|
| 70 |
-
'search': search_string,
|
| 71 |
-
'language': language,
|
| 72 |
-
'type': "item",
|
| 73 |
-
'limit': 50,
|
| 74 |
-
'format': 'json',
|
| 75 |
-
}
|
| 76 |
-
|
| 77 |
-
if strict_language:
|
| 78 |
-
params.update({'strict_language': ''})
|
| 79 |
-
|
| 80 |
-
cont_count = 0
|
| 81 |
-
results = []
|
| 82 |
-
|
| 83 |
-
while True:
|
| 84 |
-
params.update({'continue': cont_count})
|
| 85 |
-
search_results = mediawiki_api_call_helper(data=params, login = login, mediawiki_api_url=mediawiki_api_url, user_agent = config['USER_AGENT'])
|
| 86 |
-
if search_results['success'] != 1:
|
| 87 |
-
raise SearchError('Wikibase API wbsearchentities failed')
|
| 88 |
-
|
| 89 |
-
for i in search_results['search']:
|
| 90 |
-
if dict_result:
|
| 91 |
-
description = i['description'] if 'description' in i else None
|
| 92 |
-
aliases = i['aliases'] if 'aliases' in i else None
|
| 93 |
-
results.append({
|
| 94 |
-
'id': i['id'],
|
| 95 |
-
'label': i['label'],
|
| 96 |
-
'match': i['match'],
|
| 97 |
-
'description': description,
|
| 98 |
-
'aliases': aliases
|
| 99 |
-
})
|
| 100 |
-
else:
|
| 101 |
-
results.append(i['id'])
|
| 102 |
-
|
| 103 |
-
if 'search-continue' not in search_results:
|
| 104 |
-
break
|
| 105 |
-
cont_count = search_results['search-continue']
|
| 106 |
-
if cont_count >= 50:
|
| 107 |
-
break
|
| 108 |
-
return results
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
wikibase_properties_id = {"instance of": get_property_id_by_label("instance of", wikibase_api_url),
|
| 112 |
-
"reference URL": get_property_id_by_label("reference URL", wikibase_api_url),
|
| 113 |
-
"start time": get_property_id_by_label("start time", wikibase_api_url),
|
| 114 |
-
"end time": get_property_id_by_label("end time", wikibase_api_url),
|
| 115 |
-
"occupation title": get_property_id_by_label("occupation title", wikibase_api_url),
|
| 116 |
-
"educated at": get_property_id_by_label("educated at", wikibase_api_url),
|
| 117 |
-
"employer": get_property_id_by_label("employer", wikibase_api_url),
|
| 118 |
-
"work location": get_property_id_by_label("work location", wikibase_api_url),
|
| 119 |
-
"award received": get_property_id_by_label("award received", wikibase_api_url),
|
| 120 |
-
"point in time": get_property_id_by_label("point in time", wikibase_api_url),
|
| 121 |
-
"exact match": get_property_id_by_label("exact match", wikibase_api_url),
|
| 122 |
-
"date of birth": get_property_id_by_label("date of birth", wikibase_api_url),
|
| 123 |
-
"place of birth": get_property_id_by_label("place of birth", wikibase_api_url),
|
| 124 |
-
"date of death": get_property_id_by_label("date of death", wikibase_api_url),
|
| 125 |
-
"country of citizenship": get_property_id_by_label("country of citizenship", wikibase_api_url),
|
| 126 |
-
"occupation": get_property_id_by_label("occupation", wikibase_api_url),
|
| 127 |
-
"sex or gender": get_property_id_by_label("sex or gender", wikibase_api_url),
|
| 128 |
-
"official website": get_property_id_by_label("official website", wikibase_api_url),
|
| 129 |
-
"perfumes": get_property_id_by_label("perfumes", wikibase_api_url),
|
| 130 |
-
"who wears it": get_property_id_by_label("who wears it", wikibase_api_url),
|
| 131 |
-
"inception": get_property_id_by_label("inception", wikibase_api_url),
|
| 132 |
-
"headquarters location": get_property_id_by_label("headquarters location", wikibase_api_url),
|
| 133 |
-
"parent organization": get_property_id_by_label("parent organization", wikibase_api_url),
|
| 134 |
-
"founded by": get_property_id_by_label("founded by", wikibase_api_url),
|
| 135 |
-
"owned by": get_property_id_by_label("owned by", wikibase_api_url),
|
| 136 |
-
"industry": get_property_id_by_label("industry", wikibase_api_url),
|
| 137 |
-
"country": get_property_id_by_label("country", wikibase_api_url),
|
| 138 |
-
"total revenue": get_property_id_by_label("total revenue", wikibase_api_url),
|
| 139 |
-
"designer employed": get_property_id_by_label("designer employed", wikibase_api_url),
|
| 140 |
-
"country of origin": get_property_id_by_label("country of origin", wikibase_api_url),
|
| 141 |
-
"fashion collection": get_property_id_by_label("fashion collection", wikibase_api_url),
|
| 142 |
-
"fashion season": get_property_id_by_label("fashion season", wikibase_api_url),
|
| 143 |
-
"fashion show location": get_property_id_by_label("fashion show location", wikibase_api_url),
|
| 144 |
-
"description of fashion collection": get_property_id_by_label("description of fashion collection", wikibase_api_url),
|
| 145 |
-
"image of fashion collection": get_property_id_by_label("image of fashion collection", wikibase_api_url),
|
| 146 |
-
"editor of fashion collection description": get_property_id_by_label("editor of fashion collection description", wikibase_api_url),
|
| 147 |
-
"date of fashion collection": get_property_id_by_label("date of fashion collection", wikibase_api_url),
|
| 148 |
-
"fashion show category": get_property_id_by_label("fashion show category", wikibase_api_url),
|
| 149 |
-
"fashion house X fashion collection": get_property_id_by_label("fashion house X fashion collection", wikibase_api_url),
|
| 150 |
-
"designer of collection": get_property_id_by_label("designer of collection", wikibase_api_url)}
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
classes_wikibase = {"fashion designer": get_entity_id_by_label("fashion designer", "wikibase")[0],
|
| 154 |
-
"fashion house": get_entity_id_by_label("fashion house", "wikibase")[0],
|
| 155 |
-
"business": get_entity_id_by_label("business", "wikibase")[0],
|
| 156 |
-
"academic institution": get_entity_id_by_label("academic institution", "wikibase")[0],
|
| 157 |
-
"geographic location": get_entity_id_by_label("geographic location", "wikibase")[0],
|
| 158 |
-
"fashion award": get_entity_id_by_label("fashion award","wikibase")[0],
|
| 159 |
-
"gender":get_entity_id_by_label("gender","wikibase")[0] ,
|
| 160 |
-
"occupation": get_entity_id_by_label("occupation","wikibase")[0],
|
| 161 |
-
"human": get_entity_id_by_label("human","wikibase")[0],
|
| 162 |
-
"organization": get_entity_id_by_label("organization","wikibase")[0],
|
| 163 |
-
"brand": get_entity_id_by_label("brand","wikibase")[0],
|
| 164 |
-
"lifestyle brand": get_entity_id_by_label("lifestyle brand","wikibase")[0],
|
| 165 |
-
"privately held company": get_entity_id_by_label("privately held company","wikibase")[0],
|
| 166 |
-
"fashion award": get_entity_id_by_label("fashion award", "wikibase")[0],
|
| 167 |
-
"fashion season": get_entity_id_by_label("fashion season", "wikibase")[0],
|
| 168 |
-
"fashion show category": get_entity_id_by_label("fashion show category", "wikibase")[0],
|
| 169 |
-
"fashion season collection": get_entity_id_by_label("fashion season collection", "wikibase")[0],
|
| 170 |
-
"fashion journalist": get_entity_id_by_label("fashion journalist", "wikibase")[0],
|
| 171 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|