Spaces:
Sleeping
Sleeping
traopia
commited on
Commit
·
dfef9ae
1
Parent(s):
06625d2
it is working now
Browse files- src/__pycache__/generate_queries_alternative.cpython-310.pyc +0 -0
- src/__pycache__/questions_queries.cpython-310.pyc +0 -0
- src/__pycache__/sparql_query_wikibase.cpython-310.pyc +0 -0
- src/__pycache__/use_llm.cpython-310.pyc +0 -0
- src/__pycache__/visual_qa.cpython-310.pyc +0 -0
- src/__pycache__/wikibase_helpers.cpython-310.pyc +0 -0
- src/generate_queries_alternative.py +12 -79
- src/questions_queries.py +44 -51
- src/sparql_query_wikibase.py +64 -109
- src/visual_qa.py +8 -7
- src/wikibase_helpers.py +0 -171
src/__pycache__/generate_queries_alternative.cpython-310.pyc
ADDED
Binary file (17 kB). View file
|
|
src/__pycache__/questions_queries.cpython-310.pyc
ADDED
Binary file (34.7 kB). View file
|
|
src/__pycache__/sparql_query_wikibase.cpython-310.pyc
ADDED
Binary file (4.42 kB). View file
|
|
src/__pycache__/use_llm.cpython-310.pyc
ADDED
Binary file (1.43 kB). View file
|
|
src/__pycache__/visual_qa.cpython-310.pyc
ADDED
Binary file (3.4 kB). View file
|
|
src/__pycache__/wikibase_helpers.cpython-310.pyc
ADDED
Binary file (5.52 kB). View file
|
|
src/generate_queries_alternative.py
CHANGED
@@ -7,6 +7,7 @@ import chromadb
|
|
7 |
import spacy
|
8 |
import numpy as np
|
9 |
import os
|
|
|
10 |
#os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
11 |
|
12 |
import spacy
|
@@ -20,63 +21,7 @@ def get_nlp():
|
|
20 |
return spacy.load("en_core_web_sm")
|
21 |
nlp = get_nlp()
|
22 |
|
23 |
-
|
24 |
-
'reference URL': 'P24',
|
25 |
-
'start time': 'P15',
|
26 |
-
'end time': 'P16',
|
27 |
-
'occupation title': 'P25',
|
28 |
-
'educated at': 'P9',
|
29 |
-
'employer': 'P10',
|
30 |
-
'work location': 'P7',
|
31 |
-
'award received': 'P18',
|
32 |
-
'point in time': 'P28',
|
33 |
-
'exact match': 'P23',
|
34 |
-
'date of birth': 'P3',
|
35 |
-
'place of birth': 'P4',
|
36 |
-
'date of death': 'P5',
|
37 |
-
'country of citizenship': 'P6',
|
38 |
-
'occupation': 'P19',
|
39 |
-
'sex or gender': 'P8',
|
40 |
-
'official website': 'P17',
|
41 |
-
'perfumes': 'P27',
|
42 |
-
'who wears it': 'P26',
|
43 |
-
'inception': 'P11',
|
44 |
-
'headquarters location': 'P12',
|
45 |
-
'parent organization': 'P13',
|
46 |
-
'founded by': 'P14',
|
47 |
-
'owned by': 'P22',
|
48 |
-
'industry': 'P20',
|
49 |
-
'country': 'P30',
|
50 |
-
'total revenue': 'P21',
|
51 |
-
'designer employed': 'P29',
|
52 |
-
'country of origin': 'P30',
|
53 |
-
'fashion collection': 'P31',
|
54 |
-
'fashion season': 'P32',
|
55 |
-
'fashion show location': 'P33',
|
56 |
-
'description of fashion collection': 'P34',
|
57 |
-
'image of fashion collection': 'P35',
|
58 |
-
'editor of fashion collection description': 'P36',
|
59 |
-
'date of fashion collection': 'P37',
|
60 |
-
'fashion show category': 'P38',
|
61 |
-
'fashion house X fashion collection': 'P39'}
|
62 |
-
|
63 |
-
classes_wikibase = {'fashion designer': 'Q5',
|
64 |
-
'fashion house': 'Q1',
|
65 |
-
'business': 'Q9',
|
66 |
-
'academic institution': 'Q2',
|
67 |
-
'geographic location': 'Q4',
|
68 |
-
'fashion award': 'Q8',
|
69 |
-
'gender': 'Q6',
|
70 |
-
'occupation': 'Q7',
|
71 |
-
'human': 'Q36',
|
72 |
-
'organization': 'Q3',
|
73 |
-
'brand': 'Q38',
|
74 |
-
'lifestyle brand': 'Q3417',
|
75 |
-
'privately held company': 'Q1729',
|
76 |
-
'fashion season': 'Q8199',
|
77 |
-
'fashion show category': 'Q8200',
|
78 |
-
'fashion season collection': 'Q8201',
|
79 |
-
'fashion journalist': 'Q8207'}
|
80 |
|
81 |
questions_queries_all = [{ "question": education_most_popular_question, "query": education_most_popular_query},
|
82 |
{ "question": how_many_designers_per_fashion_house_question, "query": how_many_designers_per_fashion_house_query},
|
@@ -91,19 +36,12 @@ questions_queries_all = [{ "question": education_most_popular_question, "query":
|
|
91 |
{"question": collections_jeans_question, "query": collections_jeans_query},
|
92 |
{"question": creative_directors_school_question, "query": creative_directors_school_query},
|
93 |
{"question": fashion_houses_thematic_collection_question, "query": fashion_houses_thematic_collection_query},
|
94 |
-
|
95 |
-
{ "question": designer_fashion_house_directors_question
|
96 |
-
{ "question": country_designer_question
|
97 |
-
{ "question": designer_order_fashion_collection_question
|
98 |
-
{ "question": designer_fashion_director_question2
|
99 |
-
{ "question": year_designers_birth_question
|
100 |
-
# {"question": fashion_house_directors_question.substitute({ "x": f"{"Chanel"}"}), "query": fashion_house_directors_query.substitute({ "x": f"'{"Chanel"}'"})},
|
101 |
-
# { "question": designer_fashion_house_directors_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_house_directors_query.substitute({ "x": f"'{"Alexander McQueen"}'"})},
|
102 |
-
# {"question": country_designer_question.substitute({ "x": f"{"Italy"}"}), "query": country_designer_query.substitute({ "x": f"'{"Italy"}'"})},
|
103 |
-
# { "question": designer_order_fashion_collection_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_order_fashion_collection_query.substitute({ "x": f"'{"Alexander McQueen"}'"})},
|
104 |
-
# {"question": designer_fashion_director_question2.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_director_query2.substitute({ "x": f"'{"Alexander McQueen"}'"})},
|
105 |
-
# { "question": year_designers_birth_question.substitute({ "x": 1970}), "query": year_designers_birth_query.substitute({ "x": 1970})},
|
106 |
-
# { "question": fashion_collection_images_question.substitute({ "x": f"{"Chanel"}"}), "query": fashion_collection_images_query.substitute({ "x": f"'{"Chanel"}'"})}
|
107 |
]
|
108 |
|
109 |
if os.path.exists("web_app/query_log.json"):
|
@@ -206,8 +144,7 @@ def similarity_question(question, questions_queries_dictionary, collection, n_re
|
|
206 |
|
207 |
# Store each unique document in the vector embedding database
|
208 |
for i, d in enumerate(masked_documents):
|
209 |
-
|
210 |
-
embedding = response[0] # Extract the first (and only) embedding from the nested list
|
211 |
|
212 |
# Check if embedding is unique
|
213 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
@@ -222,8 +159,7 @@ def similarity_question(question, questions_queries_dictionary, collection, n_re
|
|
222 |
|
223 |
# Compute the embedding for the input question
|
224 |
masked_question = mask_entities(question, nlp)
|
225 |
-
|
226 |
-
query_embedding = response[0] # Extract embedding
|
227 |
|
228 |
results = collection.query(
|
229 |
query_embeddings=[query_embedding], # Ensure correct format
|
@@ -265,9 +201,7 @@ def similarity_question_no_masking(question, questions_queries_dictionary, colle
|
|
265 |
|
266 |
# Store each unique document in the vector embedding database
|
267 |
for i, d in enumerate(original_documents):
|
268 |
-
|
269 |
-
embedding = response[0] # Extract the first (and only) embedding from the nested list
|
270 |
-
|
271 |
# Check if embedding is unique
|
272 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
273 |
if not is_duplicate:
|
@@ -281,8 +215,7 @@ def similarity_question_no_masking(question, questions_queries_dictionary, colle
|
|
281 |
|
282 |
# Compute the embedding for the input question
|
283 |
|
284 |
-
|
285 |
-
query_embedding = response[0] # Extract embedding
|
286 |
|
287 |
results = collection.query(
|
288 |
query_embeddings=[query_embedding], # Ensure correct format
|
|
|
7 |
import spacy
|
8 |
import numpy as np
|
9 |
import os
|
10 |
+
from src.sparql_query_wikibase import wikibase_properties_id, classes_wikibase
|
11 |
#os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
12 |
|
13 |
import spacy
|
|
|
21 |
return spacy.load("en_core_web_sm")
|
22 |
nlp = get_nlp()
|
23 |
|
24 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
questions_queries_all = [{ "question": education_most_popular_question, "query": education_most_popular_query},
|
27 |
{ "question": how_many_designers_per_fashion_house_question, "query": how_many_designers_per_fashion_house_query},
|
|
|
36 |
{"question": collections_jeans_question, "query": collections_jeans_query},
|
37 |
{"question": creative_directors_school_question, "query": creative_directors_school_query},
|
38 |
{"question": fashion_houses_thematic_collection_question, "query": fashion_houses_thematic_collection_query},
|
39 |
+
{"question": fashion_house_directors_question, "query": fashion_house_directors_query},
|
40 |
+
{ "question": designer_fashion_house_directors_question, "query": designer_fashion_house_directors_query},
|
41 |
+
{ "question": country_designer_question, "query": country_designer_query},
|
42 |
+
{ "question": designer_order_fashion_collection_question, "query": designer_order_fashion_collection_query},
|
43 |
+
{ "question": designer_fashion_director_question2, "query": designer_fashion_director_query2},
|
44 |
+
{ "question": year_designers_birth_question, "query": year_designers_birth_query}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
]
|
46 |
|
47 |
if os.path.exists("web_app/query_log.json"):
|
|
|
144 |
|
145 |
# Store each unique document in the vector embedding database
|
146 |
for i, d in enumerate(masked_documents):
|
147 |
+
embedding = get_embeddings(d)[0]
|
|
|
148 |
|
149 |
# Check if embedding is unique
|
150 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
|
|
159 |
|
160 |
# Compute the embedding for the input question
|
161 |
masked_question = mask_entities(question, nlp)
|
162 |
+
query_embedding = get_embeddings(d)[0]
|
|
|
163 |
|
164 |
results = collection.query(
|
165 |
query_embeddings=[query_embedding], # Ensure correct format
|
|
|
201 |
|
202 |
# Store each unique document in the vector embedding database
|
203 |
for i, d in enumerate(original_documents):
|
204 |
+
embedding = get_embeddings(d)[0]
|
|
|
|
|
205 |
# Check if embedding is unique
|
206 |
is_duplicate = any(np.allclose(embedding, np.array(e), atol=1e-6) for e in unique_embeddings.values())
|
207 |
if not is_duplicate:
|
|
|
215 |
|
216 |
# Compute the embedding for the input question
|
217 |
|
218 |
+
query_embedding = get_embeddings(question)[0]
|
|
|
219 |
|
220 |
results = collection.query(
|
221 |
query_embeddings=[query_embedding], # Ensure correct format
|
src/questions_queries.py
CHANGED
@@ -5,8 +5,8 @@ from src.sparql_query_wikibase import *
|
|
5 |
|
6 |
|
7 |
|
8 |
-
fashion_collection_images_question =
|
9 |
-
fashion_collection_images_query =
|
10 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
11 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
12 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
@@ -17,9 +17,9 @@ PREFIX prov: <http://www.w3.org/ns/prov#>
|
|
17 |
|
18 |
SELECT ?statement ?fashion_collectionLabel ?image_url ?reference_URL
|
19 |
?fashion_seasonLabel (YEAR(?fashion_year) AS ?year) ?categoryLabel WHERE {
|
20 |
-
{ ?fashion_house rdfs:label
|
21 |
UNION
|
22 |
-
{ ?fashion_house skos:altLabel
|
23 |
|
24 |
|
25 |
?fashion_house p:P31 ?statement.
|
@@ -39,10 +39,10 @@ SELECT ?statement ?fashion_collectionLabel ?image_url ?reference_URL
|
|
39 |
}
|
40 |
|
41 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } }
|
42 |
-
"""
|
43 |
|
44 |
-
year_designers_birth_question =
|
45 |
-
year_designers_birth_query =
|
46 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
47 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
48 |
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
@@ -53,13 +53,13 @@ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
|
53 |
|
54 |
SELECT ?designerLabel ?birthdate WHERE {
|
55 |
?designer wbt:P3 ?birthdate .
|
56 |
-
FILTER (YEAR(?birthdate) =
|
57 |
|
58 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
59 |
-
} """
|
60 |
|
61 |
-
fashion_house_directors_question =
|
62 |
-
fashion_house_directors_query =
|
63 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
64 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
65 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
@@ -74,9 +74,9 @@ SELECT
|
|
74 |
WHERE {
|
75 |
# Fashion house identified by brand name
|
76 |
?fashion_house wbt:P2 wb:Q1.
|
77 |
-
{ ?fashion_house rdfs:label
|
78 |
UNION
|
79 |
-
{ ?fashion_house skos:altLabel
|
80 |
|
81 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P10
|
82 |
{
|
@@ -139,12 +139,12 @@ WHERE {
|
|
139 |
FILTER(?reference_URL_source != "")
|
140 |
# Retrieve labels
|
141 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
142 |
-
} ORDER BY ?start_year """
|
143 |
|
144 |
|
145 |
|
146 |
-
designer_fashion_house_directors_question =
|
147 |
-
designer_fashion_house_directors_query =
|
148 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
149 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
150 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
@@ -160,9 +160,9 @@ SELECT ?fashion_houseLabel
|
|
160 |
WHERE {
|
161 |
# Designer linked to fashion house through roles or founded_by
|
162 |
?fashion_designer wbt:P2 wb:Q5.
|
163 |
-
{ ?fashion_designer rdfs:label
|
164 |
UNION
|
165 |
-
{ ?fashion_designer skos:altLabel
|
166 |
|
167 |
# Link through employer (P10)
|
168 |
?fashion_designer p:P10 ?statement.
|
@@ -200,11 +200,11 @@ WHERE {
|
|
200 |
|
201 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
202 |
}
|
203 |
-
"""
|
204 |
|
205 |
|
206 |
-
designer_fashion_director_question2 =
|
207 |
-
designer_fashion_director_query2 =
|
208 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
209 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
210 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
@@ -217,9 +217,9 @@ SELECT
|
|
217 |
WHERE {
|
218 |
# Fashion designer identified by name
|
219 |
?creative_director wbt:P2 wb:Q5.
|
220 |
-
{ ?creative_director rdfs:label
|
221 |
UNION
|
222 |
-
{ ?creative_director skos:altLabel
|
223 |
|
224 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P30
|
225 |
{
|
@@ -256,10 +256,10 @@ WHERE {
|
|
256 |
|
257 |
# Retrieve labels
|
258 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
259 |
-
} ORDER BY ?start_year """
|
260 |
|
261 |
-
country_designer_question =
|
262 |
-
country_designer_query =
|
263 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
264 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
265 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
@@ -276,9 +276,9 @@ SELECT ?fashion_designerLabel (SAMPLE(?statement) AS ?statement)
|
|
276 |
# Access the statement for the country relationship
|
277 |
?fashion_designer p:P6 ?statement.
|
278 |
?statement ps:P6 ?country.
|
279 |
-
{ ?country rdfs:label
|
280 |
UNION
|
281 |
-
{ ?country skos:altLabel
|
282 |
|
283 |
# Retrieve references from the statement
|
284 |
OPTIONAL {
|
@@ -290,9 +290,9 @@ SELECT ?fashion_designerLabel (SAMPLE(?statement) AS ?statement)
|
|
290 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
291 |
}
|
292 |
GROUP BY ?fashion_designerLabel
|
293 |
-
ORDER BY ?fashion_designerLabel """
|
294 |
|
295 |
-
country_designer_query =
|
296 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
297 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
298 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
@@ -306,9 +306,9 @@ SELECT ?fashion_designerLabel (SAMPLE(?reference_URL) AS ?reference_URL) {
|
|
306 |
?fashion_designer wbt:P2 wb:Q5.
|
307 |
?fashion_designer wbt:P6 ?country.
|
308 |
?fashion_designer p:P6 ?statement.
|
309 |
-
{ ?country rdfs:label
|
310 |
UNION
|
311 |
-
{ ?country skos:altLabel
|
312 |
|
313 |
|
314 |
# Retrieve references from the statement
|
@@ -321,11 +321,11 @@ SELECT ?fashion_designerLabel (SAMPLE(?reference_URL) AS ?reference_URL) {
|
|
321 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
322 |
}
|
323 |
GROUP BY ?fashion_designerLabel
|
324 |
-
ORDER BY ?fashion_designerLabel """
|
325 |
|
326 |
|
327 |
-
designer_order_fashion_collection_question =
|
328 |
-
designer_order_fashion_collection_query =
|
329 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
330 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
331 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
@@ -340,9 +340,9 @@ SELECT ?fashion_houseLabel
|
|
340 |
(YEAR(?fashion_year) as ?year ) ?fashion_seasonLabel ?categoryLabel WHERE {
|
341 |
# Designer linked to fashion house through roles or founded_by
|
342 |
?fashion_designer wbt:P2 wb:Q5.
|
343 |
-
{ ?fashion_designer rdfs:label
|
344 |
UNION
|
345 |
-
{ ?fashion_designer skos:altLabel
|
346 |
|
347 |
|
348 |
# Link through "creative director" or other roles
|
@@ -384,7 +384,7 @@ FILTER(
|
|
384 |
}
|
385 |
|
386 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
387 |
-
} ORDER BY ?year """
|
388 |
|
389 |
|
390 |
education_most_popular_question = "What are the most popular educational institutions for fashion designers?"
|
@@ -683,20 +683,13 @@ questions_queries_dict = [ { "question": education_most_popular_question, "query
|
|
683 |
{"question": collections_sustainability_theme_question, "query": collections_sustainability_theme_query},
|
684 |
]
|
685 |
|
686 |
-
# questions_queries_dict_entity = [{"question": fashion_house_directors_question.substitute({ "x": f"{"Chanel"}"}), "query": fashion_house_directors_query},
|
687 |
-
# { "question": designer_fashion_house_directors_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_house_directors_query},
|
688 |
-
# { "question": country_designer_question.substitute({ "x": f"{"Italy"}"}), "query": country_designer_query},
|
689 |
-
# { "question": designer_order_fashion_collection_question.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_order_fashion_collection_query},
|
690 |
-
# { "question": designer_fashion_director_question2.substitute({ "x": f"{"Alexander McQueen"}"}), "query": designer_fashion_director_query2},
|
691 |
-
# { "question": year_designers_birth_question.substitute({ "x": f"{"1970"}"}), "query": year_designers_birth_query}]
|
692 |
-
|
693 |
|
694 |
-
questions_queries_dict_entity = [{"question": fashion_house_directors_question
|
695 |
-
{ "question": designer_fashion_house_directors_question
|
696 |
-
{ "question": country_designer_question
|
697 |
-
{ "question": designer_order_fashion_collection_question
|
698 |
-
{ "question": designer_fashion_director_question2
|
699 |
-
{ "question": year_designers_birth_question
|
700 |
|
701 |
|
702 |
# Designers
|
|
|
5 |
|
6 |
|
7 |
|
8 |
+
fashion_collection_images_question = "Images of collections produced by Chanel"
|
9 |
+
fashion_collection_images_query = """
|
10 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
11 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
12 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
|
17 |
|
18 |
SELECT ?statement ?fashion_collectionLabel ?image_url ?reference_URL
|
19 |
?fashion_seasonLabel (YEAR(?fashion_year) AS ?year) ?categoryLabel WHERE {
|
20 |
+
{ ?fashion_house rdfs:label "Chanel"@en.} # Match label
|
21 |
UNION
|
22 |
+
{ ?fashion_house skos:altLabel "Chanel"@en. } # Match alias
|
23 |
|
24 |
|
25 |
?fashion_house p:P31 ?statement.
|
|
|
39 |
}
|
40 |
|
41 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } }
|
42 |
+
"""
|
43 |
|
44 |
+
year_designers_birth_question = "Which designers were born in 1963?"
|
45 |
+
year_designers_birth_query = """
|
46 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
47 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
48 |
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
|
|
53 |
|
54 |
SELECT ?designerLabel ?birthdate WHERE {
|
55 |
?designer wbt:P3 ?birthdate .
|
56 |
+
FILTER (YEAR(?birthdate) = 1963)
|
57 |
|
58 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
59 |
+
} """
|
60 |
|
61 |
+
fashion_house_directors_question = "Which designers have been creative directors of Chanel ?"
|
62 |
+
fashion_house_directors_query = """
|
63 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
64 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
65 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
|
74 |
WHERE {
|
75 |
# Fashion house identified by brand name
|
76 |
?fashion_house wbt:P2 wb:Q1.
|
77 |
+
{ ?fashion_house rdfs:label "Chanel"@en.} # Match label
|
78 |
UNION
|
79 |
+
{ ?fashion_house skos:altLabel "Chanel"@en. } # Match alias
|
80 |
|
81 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P10
|
82 |
{
|
|
|
139 |
FILTER(?reference_URL_source != "")
|
140 |
# Retrieve labels
|
141 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
142 |
+
} ORDER BY ?start_year """
|
143 |
|
144 |
|
145 |
|
146 |
+
designer_fashion_house_directors_question = "In which fashion house did Alessandro Michele work as a creative director?"
|
147 |
+
designer_fashion_house_directors_query = """
|
148 |
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
149 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
150 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
|
|
160 |
WHERE {
|
161 |
# Designer linked to fashion house through roles or founded_by
|
162 |
?fashion_designer wbt:P2 wb:Q5.
|
163 |
+
{ ?fashion_designer rdfs:label "Alessandro Michele"@en.} # Match label
|
164 |
UNION
|
165 |
+
{ ?fashion_designer skos:altLabel "Alessandro Michele"@en. } # Match alias
|
166 |
|
167 |
# Link through employer (P10)
|
168 |
?fashion_designer p:P10 ?statement.
|
|
|
200 |
|
201 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
202 |
}
|
203 |
+
"""
|
204 |
|
205 |
|
206 |
+
designer_fashion_director_question2 = "In which fashion house did Alessandro Michele work as a creative director?"
|
207 |
+
designer_fashion_director_query2 = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
208 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
209 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
210 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
217 |
WHERE {
|
218 |
# Fashion designer identified by name
|
219 |
?creative_director wbt:P2 wb:Q5.
|
220 |
+
{ ?creative_director rdfs:label "Alessandro Michele"@en.} # Match label
|
221 |
UNION
|
222 |
+
{ ?creative_director skos:altLabel "Alessandro Michele"@en. } # Match alias
|
223 |
|
224 |
# Link through designer employed (P29) property for the fashion house - the property for designer employment is P30
|
225 |
{
|
|
|
256 |
|
257 |
# Retrieve labels
|
258 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
259 |
+
} ORDER BY ?start_year """
|
260 |
|
261 |
+
country_designer_question = "Which designers are from Italy ?"
|
262 |
+
country_designer_query = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
263 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
264 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
265 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
276 |
# Access the statement for the country relationship
|
277 |
?fashion_designer p:P6 ?statement.
|
278 |
?statement ps:P6 ?country.
|
279 |
+
{ ?country rdfs:label "Italy"@en.} # Match label
|
280 |
UNION
|
281 |
+
{ ?country skos:altLabel "Italy"@en. } # Match alias
|
282 |
|
283 |
# Retrieve references from the statement
|
284 |
OPTIONAL {
|
|
|
290 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
291 |
}
|
292 |
GROUP BY ?fashion_designerLabel
|
293 |
+
ORDER BY ?fashion_designerLabel """
|
294 |
|
295 |
+
country_designer_query = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
296 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
297 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
298 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
306 |
?fashion_designer wbt:P2 wb:Q5.
|
307 |
?fashion_designer wbt:P6 ?country.
|
308 |
?fashion_designer p:P6 ?statement.
|
309 |
+
{ ?country rdfs:label "Italy"@en.} # Match label
|
310 |
UNION
|
311 |
+
{ ?country skos:altLabel "Italy"@en. } # Match alias
|
312 |
|
313 |
|
314 |
# Retrieve references from the statement
|
|
|
321 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
322 |
}
|
323 |
GROUP BY ?fashion_designerLabel
|
324 |
+
ORDER BY ?fashion_designerLabel """
|
325 |
|
326 |
|
327 |
+
designer_order_fashion_collection_question = "Which fashion collections did Alessandro Michele work on?"
|
328 |
+
designer_order_fashion_collection_query = """PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
329 |
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
330 |
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
331 |
PREFIX pr: <https://fashionwiki.wikibase.cloud/prop/reference/>
|
|
|
340 |
(YEAR(?fashion_year) as ?year ) ?fashion_seasonLabel ?categoryLabel WHERE {
|
341 |
# Designer linked to fashion house through roles or founded_by
|
342 |
?fashion_designer wbt:P2 wb:Q5.
|
343 |
+
{ ?fashion_designer rdfs:label "Alessandro Michele"@en.} # Match label
|
344 |
UNION
|
345 |
+
{ ?fashion_designer skos:altLabel "Alessandro Michele"@en. } # Match alias
|
346 |
|
347 |
|
348 |
# Link through "creative director" or other roles
|
|
|
384 |
}
|
385 |
|
386 |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
387 |
+
} ORDER BY ?year """
|
388 |
|
389 |
|
390 |
education_most_popular_question = "What are the most popular educational institutions for fashion designers?"
|
|
|
683 |
{"question": collections_sustainability_theme_question, "query": collections_sustainability_theme_query},
|
684 |
]
|
685 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
686 |
|
687 |
+
questions_queries_dict_entity = [{"question": fashion_house_directors_question, "query": fashion_house_directors_query},
|
688 |
+
{ "question": designer_fashion_house_directors_question, "query": designer_fashion_house_directors_query},
|
689 |
+
{ "question": country_designer_question, "query": country_designer_query},
|
690 |
+
{ "question": designer_order_fashion_collection_question, "query": designer_order_fashion_collection_query},
|
691 |
+
{ "question": designer_fashion_director_question2, "query": designer_fashion_director_query2},
|
692 |
+
{ "question": year_designers_birth_question, "query": year_designers_birth_query}]
|
693 |
|
694 |
|
695 |
# Designers
|
src/sparql_query_wikibase.py
CHANGED
@@ -10,7 +10,70 @@ from wikibaseintegrator.wbi_helpers import get_user_agent
|
|
10 |
import pandas as pd
|
11 |
from string import Template
|
12 |
queries = False
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
def execute_sparql_query(query: str, prefix: str | None = None, endpoint: str | None = None, user_agent: str | None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
|
@@ -65,111 +128,3 @@ def get_results_to_df( query):
|
|
65 |
df = df.map(lambda x: x['value'] if pd.notnull(x) else None)
|
66 |
return df
|
67 |
|
68 |
-
if queries:
|
69 |
-
query_fashion_designers_template = Template("""
|
70 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
71 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
72 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
73 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
74 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
75 |
-
|
76 |
-
SELECT ?fashionDesignerLabel ?fashionDesigner WHERE {
|
77 |
-
?fashionDesigner wbt:$instance_of wb:$fashion_designer.
|
78 |
-
|
79 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
80 |
-
} ORDER BY ?fashionDesignerLabel
|
81 |
-
""")
|
82 |
-
query_fashion_designers = query_fashion_designers_template.substitute(
|
83 |
-
{
|
84 |
-
"instance_of": wikibase_properties_id["instance of"],
|
85 |
-
"fashion_designer": classes_wikibase["fashion designer"],
|
86 |
-
|
87 |
-
}
|
88 |
-
)
|
89 |
-
|
90 |
-
query_fashion_houses_template = Template("""
|
91 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
92 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
93 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
94 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
95 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
96 |
-
|
97 |
-
SELECT ?fashionHouseLabel ?fashionHouse WHERE {
|
98 |
-
?fashionHouse wbt:$instance_of wb:$fashion_house.
|
99 |
-
|
100 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
101 |
-
} ORDER BY ?fashionHouseLabel
|
102 |
-
""")
|
103 |
-
#query_fashion_designers = query_fashion_designers_template.substitute(wikidata_properties_id["occupation"], fashion_designer = classes_wikidata["fashion designer"], grand_couturier = classes_wikidata["grand couturier"])
|
104 |
-
query_fashion_house= query_fashion_houses_template.substitute(
|
105 |
-
{
|
106 |
-
"instance_of": wikibase_properties_id["instance of"],
|
107 |
-
"fashion_house": classes_wikibase["fashion house"],
|
108 |
-
|
109 |
-
}
|
110 |
-
)
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
query_school_template = Template("""
|
115 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
116 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
117 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
118 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
119 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
120 |
-
|
121 |
-
SELECT ?fashionSchoolLabel WHERE {
|
122 |
-
?fashionSchool wbt:$instance_of wb:$academic_institution.
|
123 |
-
|
124 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
125 |
-
} ORDER BY ?fashionSchoolLabel
|
126 |
-
""")
|
127 |
-
#query_fashion_designers = query_fashion_designers_template.substitute(wikidata_properties_id["occupation"], fashion_designer = classes_wikidata["fashion designer"], grand_couturier = classes_wikidata["grand couturier"])
|
128 |
-
query_school = query_school_template.substitute(
|
129 |
-
{
|
130 |
-
"instance_of": wikibase_properties_id["instance of"],
|
131 |
-
"academic_institution": classes_wikibase["academic institution"],
|
132 |
-
})
|
133 |
-
|
134 |
-
query_award_template = Template("""
|
135 |
-
PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
|
136 |
-
PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>
|
137 |
-
PREFIX pq: <https://fashionwiki.wikibase.cloud/prop/qualifier/>
|
138 |
-
PREFIX ps: <https://fashionwiki.wikibase.cloud/prop/statement/>
|
139 |
-
PREFIX p: <https://fashionwiki.wikibase.cloud/prop/>
|
140 |
-
|
141 |
-
SELECT ?fashionAwardLabel WHERE {
|
142 |
-
?fashionAward wbt:$instance_of wb:$fashion_award.
|
143 |
-
|
144 |
-
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
145 |
-
} ORDER BY ?fashionAwardLabel
|
146 |
-
""")
|
147 |
-
#query_fashion_designers = query_fashion_designers_template.substitute(wikidata_properties_id["occupation"], fashion_designer = classes_wikidata["fashion designer"], grand_couturier = classes_wikidata["grand couturier"])
|
148 |
-
query_award = query_award_template.substitute(
|
149 |
-
{
|
150 |
-
"instance_of": wikibase_properties_id["instance of"],
|
151 |
-
"fashion_award": classes_wikibase["fashion award"],
|
152 |
-
})
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
def get_fashion_designers_wikibase(output_file):
|
157 |
-
df_designers = get_results_to_df(query_fashion_designers)
|
158 |
-
df_designers.to_csv(output_file, index=False)
|
159 |
-
return get_results_to_df(query_fashion_designers)
|
160 |
-
|
161 |
-
|
162 |
-
def get_fashion_houses_wikibase(output_file):
|
163 |
-
df_fashion_houses = get_results_to_df(query_fashion_house)
|
164 |
-
df_fashion_houses.to_csv(output_file, index=False)
|
165 |
-
return get_results_to_df(query_fashion_house)
|
166 |
-
|
167 |
-
def get_schools_wikibase(output_file):
|
168 |
-
df_schools = get_results_to_df(query_school)
|
169 |
-
df_schools.to_csv(output_file, index=False)
|
170 |
-
return get_results_to_df(query_school)
|
171 |
-
|
172 |
-
def get_awards_wikibase(output_file):
|
173 |
-
df_awards = get_results_to_df(query_award)
|
174 |
-
df_awards.to_csv(output_file, index=False)
|
175 |
-
return get_results_to_df(query_award)
|
|
|
10 |
import pandas as pd
|
11 |
from string import Template
|
12 |
queries = False
|
13 |
+
wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
|
14 |
+
config = {
|
15 |
+
"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
|
16 |
+
'USER_AGENT': 'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
|
17 |
+
'WIKIBASE_URL': wikibase_api_url,
|
18 |
+
}
|
19 |
+
|
20 |
+
wikibase_properties_id = {'instance of': 'P2',
|
21 |
+
'reference URL': 'P24',
|
22 |
+
'start time': 'P15',
|
23 |
+
'end time': 'P16',
|
24 |
+
'occupation title': 'P25',
|
25 |
+
'educated at': 'P9',
|
26 |
+
'employer': 'P10',
|
27 |
+
'work location': 'P7',
|
28 |
+
'award received': 'P18',
|
29 |
+
'point in time': 'P28',
|
30 |
+
'exact match': 'P23',
|
31 |
+
'date of birth': 'P3',
|
32 |
+
'place of birth': 'P4',
|
33 |
+
'date of death': 'P5',
|
34 |
+
'country of citizenship': 'P6',
|
35 |
+
'occupation': 'P19',
|
36 |
+
'sex or gender': 'P8',
|
37 |
+
'official website': 'P17',
|
38 |
+
'perfumes': 'P27',
|
39 |
+
'who wears it': 'P26',
|
40 |
+
'inception': 'P11',
|
41 |
+
'headquarters location': 'P12',
|
42 |
+
'parent organization': 'P13',
|
43 |
+
'founded by': 'P14',
|
44 |
+
'owned by': 'P22',
|
45 |
+
'industry': 'P20',
|
46 |
+
'country': 'P30',
|
47 |
+
'total revenue': 'P21',
|
48 |
+
'designer employed': 'P29',
|
49 |
+
'country of origin': 'P30',
|
50 |
+
'fashion collection': 'P31',
|
51 |
+
'fashion season': 'P32',
|
52 |
+
'fashion show location': 'P33',
|
53 |
+
'description of fashion collection': 'P34',
|
54 |
+
'image of fashion collection': 'P35',
|
55 |
+
'editor of fashion collection description': 'P36',
|
56 |
+
'date of fashion collection': 'P37',
|
57 |
+
'fashion show category': 'P38',
|
58 |
+
'fashion house X fashion collection': 'P39'}
|
59 |
+
|
60 |
+
classes_wikibase = {'fashion designer': 'Q5',
|
61 |
+
'fashion house': 'Q1',
|
62 |
+
'business': 'Q9',
|
63 |
+
'academic institution': 'Q2',
|
64 |
+
'geographic location': 'Q4',
|
65 |
+
'fashion award': 'Q8',
|
66 |
+
'gender': 'Q6',
|
67 |
+
'occupation': 'Q7',
|
68 |
+
'human': 'Q36',
|
69 |
+
'organization': 'Q3',
|
70 |
+
'brand': 'Q38',
|
71 |
+
'lifestyle brand': 'Q3417',
|
72 |
+
'privately held company': 'Q1729',
|
73 |
+
'fashion season': 'Q8199',
|
74 |
+
'fashion show category': 'Q8200',
|
75 |
+
'fashion season collection': 'Q8201',
|
76 |
+
'fashion journalist': 'Q8207'}
|
77 |
|
78 |
|
79 |
def execute_sparql_query(query: str, prefix: str | None = None, endpoint: str | None = None, user_agent: str | None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
|
|
|
128 |
df = df.map(lambda x: x['value'] if pd.notnull(x) else None)
|
129 |
return df
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/visual_qa.py
CHANGED
@@ -9,7 +9,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
9 |
from datasets import load_dataset
|
10 |
import chromadb
|
11 |
|
12 |
-
from datetime import datetime
|
13 |
|
14 |
def initialize_collection(collection_name="clip_image_embeddings"):
|
15 |
# Initialize ChromaDB client (PersistentClient stores embeddings between runs)
|
@@ -73,7 +73,7 @@ device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
|
|
73 |
model = CLIPModel.from_pretrained(model_name).to(device)
|
74 |
processor = CLIPProcessor.from_pretrained(model_name)
|
75 |
|
76 |
-
def main_text_retrieve_images(text, result_query=None, n_retrieved=
|
77 |
|
78 |
|
79 |
|
@@ -86,6 +86,7 @@ def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
|
|
86 |
|
87 |
# Convert to pandas DataFrame
|
88 |
df_emb = full_dataset.to_pandas()
|
|
|
89 |
df_emb = df_emb.drop_duplicates(subset='image_urls')
|
90 |
df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
|
91 |
df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)
|
@@ -112,14 +113,14 @@ def main_text_retrieve_images(text, result_query=None, n_retrieved=3):
|
|
112 |
query_embeddings=text_features[0],
|
113 |
n_results=n_retrieved
|
114 |
)
|
115 |
-
|
116 |
result_doc = pd.DataFrame(results['documents'][0], columns=["image_url"])
|
117 |
df_result = df_emb.merge(result_doc[['image_url']], on='image_url', how='inner')
|
118 |
# Remove columns fashion_clip_image, image_urls, and description
|
119 |
df_result = df_result.drop(columns=['fashion_clip_image', 'description', 'editor', 'publish_date', 'image_urls'])
|
120 |
return df_result.to_dict(orient='records')
|
121 |
|
122 |
-
if __name__ == "__main__":
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
9 |
from datasets import load_dataset
|
10 |
import chromadb
|
11 |
|
12 |
+
from datetime import datetime
|
13 |
|
14 |
def initialize_collection(collection_name="clip_image_embeddings"):
|
15 |
# Initialize ChromaDB client (PersistentClient stores embeddings between runs)
|
|
|
73 |
model = CLIPModel.from_pretrained(model_name).to(device)
|
74 |
processor = CLIPProcessor.from_pretrained(model_name)
|
75 |
|
76 |
+
def main_text_retrieve_images(text, result_query=None, n_retrieved=5):
|
77 |
|
78 |
|
79 |
|
|
|
86 |
|
87 |
# Convert to pandas DataFrame
|
88 |
df_emb = full_dataset.to_pandas()
|
89 |
+
#print(df_emb.head()) # Debugging statement
|
90 |
df_emb = df_emb.drop_duplicates(subset='image_urls')
|
91 |
df_emb['fashion_clip_image'] = df_emb['fashion_clip_image'].apply(lambda x: x[0] if type(x) == list else None)
|
92 |
df_emb['image_url'] = df_emb['image_urls'].apply(lambda x: x[0] if x else None)
|
|
|
113 |
query_embeddings=text_features[0],
|
114 |
n_results=n_retrieved
|
115 |
)
|
116 |
+
|
117 |
result_doc = pd.DataFrame(results['documents'][0], columns=["image_url"])
|
118 |
df_result = df_emb.merge(result_doc[['image_url']], on='image_url', how='inner')
|
119 |
# Remove columns fashion_clip_image, image_urls, and description
|
120 |
df_result = df_result.drop(columns=['fashion_clip_image', 'description', 'editor', 'publish_date', 'image_urls'])
|
121 |
return df_result.to_dict(orient='records')
|
122 |
|
123 |
+
# if __name__ == "__main__":
|
124 |
+
# text = "dress"
|
125 |
+
# result = main_text_retrieve_images(text)
|
126 |
+
# print(result)
|
src/wikibase_helpers.py
DELETED
@@ -1,171 +0,0 @@
|
|
1 |
-
import requests
|
2 |
-
import logging
|
3 |
-
from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_helpers
|
4 |
-
from wikibaseintegrator.wbi_config import config
|
5 |
-
from wikibaseintegrator.wbi_exceptions import MWApiError
|
6 |
-
from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, SearchError
|
7 |
-
from wikibaseintegrator import WikibaseIntegrator, wbi_login
|
8 |
-
login_wikibase = wbi_login.Login(user="Traopia", password="diqfiz-wodnI4-jafwax", mediawiki_api_url="https://fashionwiki.wikibase.cloud/w/api.php")
|
9 |
-
|
10 |
-
wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
|
11 |
-
config = {
|
12 |
-
"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
|
13 |
-
'USER_AGENT': 'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
|
14 |
-
'WIKIBASE_URL': wikibase_api_url,
|
15 |
-
}
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
# List of valid language codes (can be expanded)
|
21 |
-
VALID_LANGUAGE_CODES = ['en']
|
22 |
-
|
23 |
-
def get_property_id_by_label(property_label, api_url):
|
24 |
-
"""
|
25 |
-
Resolve the property label to its corresponding property ID from Wikibase.
|
26 |
-
|
27 |
-
Args:
|
28 |
-
property_label (str): The label of the property to search.
|
29 |
-
api_url (str): The API URL of the target Wikibase or Wikidata.
|
30 |
-
|
31 |
-
Returns:
|
32 |
-
str: The property ID if found, otherwise None.
|
33 |
-
"""
|
34 |
-
url = f'{api_url}/w/api.php?action=wbsearchentities&search={property_label}&language=en&type=property&format=json'
|
35 |
-
response = requests.get(url)
|
36 |
-
|
37 |
-
if response.status_code == 200:
|
38 |
-
search_results = response.json()
|
39 |
-
if 'search' in search_results and search_results['search']:
|
40 |
-
# Return the first matching property ID
|
41 |
-
return search_results['search'][0]['id']
|
42 |
-
else:
|
43 |
-
logging.info(f"No property found for label: {property_label}")
|
44 |
-
return None
|
45 |
-
else:
|
46 |
-
logging.error(f"Failed to search for property by label in the target Wikibase. HTTP Status Code: {response.status_code}")
|
47 |
-
return None
|
48 |
-
|
49 |
-
|
50 |
-
def get_entity_id_by_label(search_string,wiki, dict_result=False) -> list:
|
51 |
-
"""
|
52 |
-
Performs a search for entities in the Wikibase instance using labels and aliases.
|
53 |
-
You can have more information on the parameters in the MediaWiki API help (https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities)
|
54 |
-
|
55 |
-
:param search_string: A string which should be searched for in the Wikibase instance (labels and aliases)
|
56 |
-
:param wiki: The wiki to search in. It can be "wikidata" or "wikibase"
|
57 |
-
:param dict_result: If True, the result will be a list of dictionaries with the keys 'id', 'label', 'match', 'description' and 'aliases'. If False, the result will be a list of strings with the entity IDs.
|
58 |
-
:return: A list of dictionaries or strings with the search results
|
59 |
-
"""
|
60 |
-
|
61 |
-
if wiki == "wikibase":
|
62 |
-
login = login_wikibase
|
63 |
-
mediawiki_api_url = wikibase_api_url
|
64 |
-
|
65 |
-
language = "en"
|
66 |
-
strict_language = False
|
67 |
-
|
68 |
-
params = {
|
69 |
-
'action': 'wbsearchentities',
|
70 |
-
'search': search_string,
|
71 |
-
'language': language,
|
72 |
-
'type': "item",
|
73 |
-
'limit': 50,
|
74 |
-
'format': 'json',
|
75 |
-
}
|
76 |
-
|
77 |
-
if strict_language:
|
78 |
-
params.update({'strict_language': ''})
|
79 |
-
|
80 |
-
cont_count = 0
|
81 |
-
results = []
|
82 |
-
|
83 |
-
while True:
|
84 |
-
params.update({'continue': cont_count})
|
85 |
-
search_results = mediawiki_api_call_helper(data=params, login = login, mediawiki_api_url=mediawiki_api_url, user_agent = config['USER_AGENT'])
|
86 |
-
if search_results['success'] != 1:
|
87 |
-
raise SearchError('Wikibase API wbsearchentities failed')
|
88 |
-
|
89 |
-
for i in search_results['search']:
|
90 |
-
if dict_result:
|
91 |
-
description = i['description'] if 'description' in i else None
|
92 |
-
aliases = i['aliases'] if 'aliases' in i else None
|
93 |
-
results.append({
|
94 |
-
'id': i['id'],
|
95 |
-
'label': i['label'],
|
96 |
-
'match': i['match'],
|
97 |
-
'description': description,
|
98 |
-
'aliases': aliases
|
99 |
-
})
|
100 |
-
else:
|
101 |
-
results.append(i['id'])
|
102 |
-
|
103 |
-
if 'search-continue' not in search_results:
|
104 |
-
break
|
105 |
-
cont_count = search_results['search-continue']
|
106 |
-
if cont_count >= 50:
|
107 |
-
break
|
108 |
-
return results
|
109 |
-
|
110 |
-
|
111 |
-
wikibase_properties_id = {"instance of": get_property_id_by_label("instance of", wikibase_api_url),
|
112 |
-
"reference URL": get_property_id_by_label("reference URL", wikibase_api_url),
|
113 |
-
"start time": get_property_id_by_label("start time", wikibase_api_url),
|
114 |
-
"end time": get_property_id_by_label("end time", wikibase_api_url),
|
115 |
-
"occupation title": get_property_id_by_label("occupation title", wikibase_api_url),
|
116 |
-
"educated at": get_property_id_by_label("educated at", wikibase_api_url),
|
117 |
-
"employer": get_property_id_by_label("employer", wikibase_api_url),
|
118 |
-
"work location": get_property_id_by_label("work location", wikibase_api_url),
|
119 |
-
"award received": get_property_id_by_label("award received", wikibase_api_url),
|
120 |
-
"point in time": get_property_id_by_label("point in time", wikibase_api_url),
|
121 |
-
"exact match": get_property_id_by_label("exact match", wikibase_api_url),
|
122 |
-
"date of birth": get_property_id_by_label("date of birth", wikibase_api_url),
|
123 |
-
"place of birth": get_property_id_by_label("place of birth", wikibase_api_url),
|
124 |
-
"date of death": get_property_id_by_label("date of death", wikibase_api_url),
|
125 |
-
"country of citizenship": get_property_id_by_label("country of citizenship", wikibase_api_url),
|
126 |
-
"occupation": get_property_id_by_label("occupation", wikibase_api_url),
|
127 |
-
"sex or gender": get_property_id_by_label("sex or gender", wikibase_api_url),
|
128 |
-
"official website": get_property_id_by_label("official website", wikibase_api_url),
|
129 |
-
"perfumes": get_property_id_by_label("perfumes", wikibase_api_url),
|
130 |
-
"who wears it": get_property_id_by_label("who wears it", wikibase_api_url),
|
131 |
-
"inception": get_property_id_by_label("inception", wikibase_api_url),
|
132 |
-
"headquarters location": get_property_id_by_label("headquarters location", wikibase_api_url),
|
133 |
-
"parent organization": get_property_id_by_label("parent organization", wikibase_api_url),
|
134 |
-
"founded by": get_property_id_by_label("founded by", wikibase_api_url),
|
135 |
-
"owned by": get_property_id_by_label("owned by", wikibase_api_url),
|
136 |
-
"industry": get_property_id_by_label("industry", wikibase_api_url),
|
137 |
-
"country": get_property_id_by_label("country", wikibase_api_url),
|
138 |
-
"total revenue": get_property_id_by_label("total revenue", wikibase_api_url),
|
139 |
-
"designer employed": get_property_id_by_label("designer employed", wikibase_api_url),
|
140 |
-
"country of origin": get_property_id_by_label("country of origin", wikibase_api_url),
|
141 |
-
"fashion collection": get_property_id_by_label("fashion collection", wikibase_api_url),
|
142 |
-
"fashion season": get_property_id_by_label("fashion season", wikibase_api_url),
|
143 |
-
"fashion show location": get_property_id_by_label("fashion show location", wikibase_api_url),
|
144 |
-
"description of fashion collection": get_property_id_by_label("description of fashion collection", wikibase_api_url),
|
145 |
-
"image of fashion collection": get_property_id_by_label("image of fashion collection", wikibase_api_url),
|
146 |
-
"editor of fashion collection description": get_property_id_by_label("editor of fashion collection description", wikibase_api_url),
|
147 |
-
"date of fashion collection": get_property_id_by_label("date of fashion collection", wikibase_api_url),
|
148 |
-
"fashion show category": get_property_id_by_label("fashion show category", wikibase_api_url),
|
149 |
-
"fashion house X fashion collection": get_property_id_by_label("fashion house X fashion collection", wikibase_api_url),
|
150 |
-
"designer of collection": get_property_id_by_label("designer of collection", wikibase_api_url)}
|
151 |
-
|
152 |
-
|
153 |
-
classes_wikibase = {"fashion designer": get_entity_id_by_label("fashion designer", "wikibase")[0],
|
154 |
-
"fashion house": get_entity_id_by_label("fashion house", "wikibase")[0],
|
155 |
-
"business": get_entity_id_by_label("business", "wikibase")[0],
|
156 |
-
"academic institution": get_entity_id_by_label("academic institution", "wikibase")[0],
|
157 |
-
"geographic location": get_entity_id_by_label("geographic location", "wikibase")[0],
|
158 |
-
"fashion award": get_entity_id_by_label("fashion award","wikibase")[0],
|
159 |
-
"gender":get_entity_id_by_label("gender","wikibase")[0] ,
|
160 |
-
"occupation": get_entity_id_by_label("occupation","wikibase")[0],
|
161 |
-
"human": get_entity_id_by_label("human","wikibase")[0],
|
162 |
-
"organization": get_entity_id_by_label("organization","wikibase")[0],
|
163 |
-
"brand": get_entity_id_by_label("brand","wikibase")[0],
|
164 |
-
"lifestyle brand": get_entity_id_by_label("lifestyle brand","wikibase")[0],
|
165 |
-
"privately held company": get_entity_id_by_label("privately held company","wikibase")[0],
|
166 |
-
"fashion award": get_entity_id_by_label("fashion award", "wikibase")[0],
|
167 |
-
"fashion season": get_entity_id_by_label("fashion season", "wikibase")[0],
|
168 |
-
"fashion show category": get_entity_id_by_label("fashion show category", "wikibase")[0],
|
169 |
-
"fashion season collection": get_entity_id_by_label("fashion season collection", "wikibase")[0],
|
170 |
-
"fashion journalist": get_entity_id_by_label("fashion journalist", "wikibase")[0],
|
171 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|