Spaces:

traopia
/

Ask-FashionDB

Sleeping

App Files Files Community

traopia commited on Jun 26

Commit

00c29b3

1 Parent(s): 767fe1d

no ollama

Browse files

Files changed (9) hide show

src/__pycache__/generate_queries_alternative.cpython-312.pyc +0 -0
src/__pycache__/questions_queries.cpython-312.pyc +0 -0
src/__pycache__/sparql_query_wikibase.cpython-312.pyc +0 -0
src/__pycache__/use_llm.cpython-312.pyc +0 -0
src/__pycache__/visual_qa.cpython-312.pyc +0 -0
src/generate_queries_alternative.py +5 -53
src/sparql_query_wikibase.py +1 -7
src/use_llm.py +15 -1
src/wikibase_helpers.py +86 -0

src/__pycache__/generate_queries_alternative.cpython-312.pyc DELETED Viewed

Binary file (28.2 kB)

src/__pycache__/questions_queries.cpython-312.pyc DELETED Viewed

Binary file (36.3 kB)

src/__pycache__/sparql_query_wikibase.cpython-312.pyc DELETED Viewed

Binary file (8.35 kB)

src/__pycache__/use_llm.cpython-312.pyc DELETED Viewed

Binary file (1.5 kB)

src/__pycache__/visual_qa.cpython-312.pyc DELETED Viewed

Binary file (5.23 kB)

src/generate_queries_alternative.py CHANGED Viewed

@@ -1,11 +1,9 @@
-from src.use_llm import main_generate
 from src.questions_queries import *
 import time
-import ollama
 import uuid
 import chromadb
-import openai
 import spacy
 import numpy as np
 nlp = spacy.load("en_core_web_sm")
@@ -174,52 +172,6 @@ def capitalize_sentences(sentences):
     return capitalized_sentences
-def similarity_question(question, questions_queries_dictionary, collection, n_results=5, threshold=0.15):
-    nlp = spacy.load("en_core_web_sm")  # Load spaCy model for entity recognition
-    original_documents = [questions_queries_dictionary[i]["question"] for i in range(len(questions_queries_dictionary))]
-    #original_documents = capitalize_sentences(original_documents)
-    masked_documents = [mask_entities(q, nlp) for q in original_documents]
-    #masked_documents = list(set(masked_documents))
-    # Store each document in the vector embedding database
-    for i, d in enumerate(masked_documents):
-        response = ollama.embed(model="mxbai-embed-large", input=d)
-        embeddings = response["embeddings"]
-        collection.add(
-            ids=[str(i)],
-            embeddings=embeddings,
-            documents=[d]
-        )
-    # Compute the embedding for the input question
-    masked_question = mask_entities(question, nlp)
-    response = ollama.embed(model="mxbai-embed-large", input=masked_question)
-    results = collection.query(
-        query_embeddings=[response["embeddings"][0]],
-        n_results=n_results
-    )
-    triples = []
-    for i in range(len(results['documents'][0])):
-        masked_similar_question = results['documents'][0][i]
-        distance = results['distances'][0][i]
-        print(distance)
-        paraphrase = distance < threshold
-        # Find the corresponding original question
-        index_similar_query = masked_documents.index(masked_similar_question)
-        original_similar_question = original_documents[index_similar_query]
-        similar_query = questions_queries_dictionary[index_similar_query]["query"]
-        if paraphrase and "[ENTITY]" in masked_similar_question and "[ENTITY]" in masked_question:
-            to_do_query = replace_entity(original_similar_question, question, similar_query)
-        else:
-            to_do_query = None
-        triples.append((original_similar_question, similar_query, to_do_query))
-    return triples
@@ -238,7 +190,7 @@ def similarity_question(question, questions_queries_dictionary, collection, n_re
     # Store each unique document in the vector embedding database
     for i, d in enumerate(masked_documents):
-        response = ollama.embed(model="mxbai-embed-large", input=d)
         embedding = response["embeddings"][0]  # Extract the first (and only) embedding from the nested list
         # Check if embedding is unique
@@ -254,7 +206,7 @@ def similarity_question(question, questions_queries_dictionary, collection, n_re
     # Compute the embedding for the input question
     masked_question = mask_entities(question, nlp)
-    response = ollama.embed(model="mxbai-embed-large", input=masked_question)
     query_embedding = response["embeddings"][0]  # Extract embedding
     results = collection.query(
@@ -297,7 +249,7 @@ def similarity_question_no_masking(question, questions_queries_dictionary, colle
     # Store each unique document in the vector embedding database
     for i, d in enumerate(original_documents):
-        response = ollama.embed(model="mxbai-embed-large", input=d)
         embedding = response["embeddings"][0]  # Extract the first (and only) embedding from the nested list
         # Check if embedding is unique
@@ -313,7 +265,7 @@ def similarity_question_no_masking(question, questions_queries_dictionary, colle
     # Compute the embedding for the input question
-    response = ollama.embed(model="mxbai-embed-large", input=question)
     query_embedding = response["embeddings"][0]  # Extract embedding
     results = collection.query(

+from src.use_llm import main_generate, get_embeddings
 from src.questions_queries import *
 import time
 import uuid
 import chromadb
 import spacy
 import numpy as np
 nlp = spacy.load("en_core_web_sm")
     return capitalized_sentences
     # Store each unique document in the vector embedding database
     for i, d in enumerate(masked_documents):
+        response = get_embeddings(d)
         embedding = response["embeddings"][0]  # Extract the first (and only) embedding from the nested list
         # Check if embedding is unique
     # Compute the embedding for the input question
     masked_question = mask_entities(question, nlp)
+    response = get_embeddings(d)
     query_embedding = response["embeddings"][0]  # Extract embedding
     results = collection.query(
     # Store each unique document in the vector embedding database
     for i, d in enumerate(original_documents):
+        response = get_embeddings(d)
         embedding = response["embeddings"][0]  # Extract the first (and only) embedding from the nested list
         # Check if embedding is unique
     # Compute the embedding for the input question
+    response = get_embeddings(question)
     query_embedding = response["embeddings"][0]  # Extract embedding
     results = collection.query(

src/sparql_query_wikibase.py CHANGED Viewed

@@ -1,9 +1,3 @@
-wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
-config = {
-"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
-'USER_AGENT':  'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
-'WIKIBASE_URL': wikibase_api_url,
-}
 from urllib.parse import urlparse
@@ -16,6 +10,7 @@ from wikibaseintegrator.wbi_helpers import get_user_agent
 import pandas as pd
 from string import Template
 queries = False
 def execute_sparql_query(query: str, prefix: str | None = None, endpoint: str | None = None, user_agent: str | None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
@@ -71,7 +66,6 @@ def get_results_to_df( query):
     return df
 if queries:
-    from src.new_fct_add_entities import wikibase_properties_id, classes_wikibase
     query_fashion_designers_template = Template("""
     PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
     PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>

 from urllib.parse import urlparse
 import pandas as pd
 from string import Template
 queries = False
+from wikibase_helpers import wikibase_properties_id, classes_wikibase, config, wikibase_api_url
 def execute_sparql_query(query: str, prefix: str | None = None, endpoint: str | None = None, user_agent: str | None = None, max_retries: int = 1000, retry_after: int = 60) -> dict:
     return df
 if queries:
     query_fashion_designers_template = Template("""
     PREFIX wbt: <https://fashionwiki.wikibase.cloud/prop/direct/>
     PREFIX wb: <https://fashionwiki.wikibase.cloud/entity/>

src/use_llm.py CHANGED Viewed

@@ -27,4 +27,18 @@ def send_chat_prompt(prompt: str, model: str, system_prompt: str) -> str:
 def main_generate(prompt, model=DEFAULT_MODEL, system_prompt="You are a helpful assistant that generates SPARQL queries."):
     response = send_chat_prompt(prompt, model, system_prompt)
     response = response.replace('```', '').replace('json', '').strip()
-    return response

 def main_generate(prompt, model=DEFAULT_MODEL, system_prompt="You are a helpful assistant that generates SPARQL queries."):
     response = send_chat_prompt(prompt, model, system_prompt)
     response = response.replace('```', '').replace('json', '').strip()
+    return response
+# Use your own token securely via Space secrets or local env
+HF_TOKEN = os.getenv("HF_TOKEN")  # define this in Hugging Face Space Secrets
+MODEL_ID = "thenlper/gte-large"  # or another embedding model like BAAI/bge-base-en
+client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
+def get_embeddings(texts):
+    if isinstance(texts, str):
+        texts = [texts]
+    embeddings = [client.embed(text) for text in texts]
+    return embeddings

src/wikibase_helpers.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import requests
+import logging
+from wikibaseintegrator import WikibaseIntegrator, datatypes,  wbi_helpers
+from wikibaseintegrator.wbi_config import config
+from wikibaseintegrator.wbi_exceptions import MWApiError
+wikibase_api_url = 'https://fashionwiki.wikibase.cloud/w/api.php'
+config = {
+"SPARQL_ENDPOINT_URL": "https://fashionwiki.wikibase.cloud/query/sparql",
+'USER_AGENT':  'YourBotName/1.0 (https://yourwebsite.org/bot-info)',
+'WIKIBASE_URL': wikibase_api_url,
+}
+# List of valid language codes (can be expanded)
+VALID_LANGUAGE_CODES = ['en']
+def get_property_id_by_label(property_label, api_url):
+    """
+    Resolve the property label to its corresponding property ID from Wikibase.
+    Args:
+        property_label (str): The label of the property to search.
+        api_url (str): The API URL of the target Wikibase or Wikidata.
+    Returns:
+        str: The property ID if found, otherwise None.
+    """
+    url = f'{api_url}/w/api.php?action=wbsearchentities&search={property_label}&language=en&type=property&format=json'
+    response = requests.get(url)
+    if response.status_code == 200:
+        search_results = response.json()
+        if 'search' in search_results and search_results['search']:
+            # Return the first matching property ID
+            return search_results['search'][0]['id']
+        else:
+            logging.info(f"No property found for label: {property_label}")
+            return None
+    else:
+        logging.error(f"Failed to search for property by label in the target Wikibase. HTTP Status Code: {response.status_code}")
+        return None
+wikibase_properties_id = {"instance of": get_property_id_by_label("instance of", wikibase_api_url),
+                    "reference URL": get_property_id_by_label("reference URL", wikibase_api_url),
+                    "start time": get_property_id_by_label("start time", wikibase_api_url),
+                    "end time": get_property_id_by_label("end time", wikibase_api_url),
+                    "occupation title": get_property_id_by_label("occupation title", wikibase_api_url),
+                    "educated at": get_property_id_by_label("educated at", wikibase_api_url),
+                    "employer": get_property_id_by_label("employer", wikibase_api_url),
+                    "work location": get_property_id_by_label("work location", wikibase_api_url),
+                    "award received": get_property_id_by_label("award received", wikibase_api_url),
+                    "point in time": get_property_id_by_label("point in time", wikibase_api_url),
+                    "exact match": get_property_id_by_label("exact match", wikibase_api_url),
+                    "date of birth": get_property_id_by_label("date of birth", wikibase_api_url),
+                    "place of birth": get_property_id_by_label("place of birth", wikibase_api_url),
+                    "date of death": get_property_id_by_label("date of death", wikibase_api_url),
+                    "country of citizenship": get_property_id_by_label("country of citizenship", wikibase_api_url),
+                    "occupation": get_property_id_by_label("occupation", wikibase_api_url),
+                    "sex or gender": get_property_id_by_label("sex or gender", wikibase_api_url),
+                    "official website": get_property_id_by_label("official website", wikibase_api_url),
+                    "perfumes": get_property_id_by_label("perfumes", wikibase_api_url),
+                    "who wears it": get_property_id_by_label("who wears it", wikibase_api_url),
+                    "inception": get_property_id_by_label("inception", wikibase_api_url),
+                    "headquarters location": get_property_id_by_label("headquarters location", wikibase_api_url),
+                    "parent organization": get_property_id_by_label("parent organization", wikibase_api_url),
+                    "founded by": get_property_id_by_label("founded by", wikibase_api_url),
+                    "owned by": get_property_id_by_label("owned by", wikibase_api_url),
+                    "industry": get_property_id_by_label("industry", wikibase_api_url),
+                    "country": get_property_id_by_label("country", wikibase_api_url),
+                    "total revenue": get_property_id_by_label("total revenue", wikibase_api_url),
+                    "designer employed": get_property_id_by_label("designer employed", wikibase_api_url),
+                    "country of origin": get_property_id_by_label("country of origin", wikibase_api_url),
+                    "fashion collection": get_property_id_by_label("fashion collection", wikibase_api_url),
+                    "fashion season": get_property_id_by_label("fashion season", wikibase_api_url),
+                    "fashion show location": get_property_id_by_label("fashion show location", wikibase_api_url),
+                    "description of fashion collection": get_property_id_by_label("description of fashion collection", wikibase_api_url),
+                    "image of fashion collection": get_property_id_by_label("image of fashion collection", wikibase_api_url),
+                    "editor of fashion collection description": get_property_id_by_label("editor of fashion collection description", wikibase_api_url),
+                    "date of fashion collection": get_property_id_by_label("date of fashion collection", wikibase_api_url),
+                    "fashion show category": get_property_id_by_label("fashion show category", wikibase_api_url),
+                    "fashion house X fashion collection": get_property_id_by_label("fashion house X fashion collection", wikibase_api_url),
+                    "designer of collection": get_property_id_by_label("designer of collection", wikibase_api_url)}