Bohui Zhang
commited on
Commit
·
1f0f8d6
1
Parent(s):
b740ca9
Update the second version
Browse files- README.md +9 -9
- app.py +110 -59
- ontochat/chatbot.py +1 -14
- ontochat/functions.py +79 -50
- ontochat/queries.py +17 -0
- ontochat/verbaliser.py +199 -0
- requirements.txt +2 -1
README.md
CHANGED
@@ -17,16 +17,16 @@ analysis, and testing. By interacting with a conversational agent, users can ste
|
|
17 |
extraction of competency questions, while receiving computational support to analyse the overall requirements and test
|
18 |
early versions of the resulting ontologies.
|
19 |
|
20 |
-
##
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
24 |
|
25 |
## TODO
|
26 |
-
-
|
27 |
-
-
|
28 |
-
- Add
|
29 |
-
|
30 |
-
- Adjust flagging
|
31 |
|
32 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
17 |
extraction of competency questions, while receiving computational support to analyse the overall requirements and test
|
18 |
early versions of the resulting ontologies.
|
19 |
|
20 |
+
## GitHub Copy
|
21 |
+
- The source code will also be available soon on GitHub: [King-s-Knowledge-Graph-Lab/OntoChat](https://github.com/King-s-Knowledge-Graph-Lab/OntoChat).
|
22 |
+
|
23 |
+
## Note
|
24 |
+
- The ontology testing part has been tested with the [Music Meta Ontology](https://github.com/polifonia-project/music-meta-ontology) and works well.
|
25 |
|
26 |
## TODO
|
27 |
+
- Improve the verbaliser (classes, named entities, and relations might be messy in some cases)
|
28 |
+
- Optimize clustering visualization (maybe only keep LLM lcustering)
|
29 |
+
- Add [flagging](https://www.gradio.app/docs/flagging), e.g., [`HuggingFaceDatasetSaver`](https://www.gradio.app/docs/flagging#hugging-face-dataset-saver-header)
|
30 |
+
|
|
|
31 |
|
32 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -28,82 +28,99 @@ with gr.Blocks() as user_story_interface:
|
|
28 |
|
29 |
with gr.Row():
|
30 |
with gr.Column():
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
placeholder="Please input a sample of data.",
|
46 |
-
info="Example: An example of data would be: - 'Let it be' by 'The Beatles' has a sequence of chords "
|
47 |
-
"composed by 'F, Amin, F' that is recurring every time the lyrics say 'Let it be'; - The lyrics "
|
48 |
-
"of 'Running with the Devil' by 'Van Halen' have a recurring chord sequence for the chorus and a "
|
49 |
-
"recurring chord sequence for the bridge."
|
50 |
-
)
|
51 |
-
generate_btn = gr.Button(value="Generate")
|
52 |
-
user_story_chatbot = gr.Chatbot(
|
53 |
-
|
54 |
-
)
|
55 |
-
chatbot_input = gr.Textbox(
|
56 |
-
placeholder="Please tell me what improvements I should make to the user story :)"
|
57 |
)
|
58 |
user_story = gr.TextArea(
|
59 |
label="User story",
|
60 |
interactive=True
|
61 |
)
|
62 |
-
|
63 |
-
fn=
|
64 |
inputs=[
|
65 |
-
|
66 |
],
|
67 |
outputs=[
|
68 |
-
user_story, user_story_chatbot
|
69 |
]
|
70 |
)
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
inputs=[
|
74 |
-
|
75 |
],
|
76 |
outputs=[
|
77 |
-
|
78 |
]
|
79 |
)
|
80 |
|
81 |
-
cq_interface = gr.Interface(
|
82 |
-
fn=cq_generator,
|
83 |
-
inputs=[
|
84 |
-
gr.Textbox(
|
85 |
-
label="User story",
|
86 |
-
info="Please copy the previously generated user story and paste it here. You can also modify the user "
|
87 |
-
"story before submitting it."
|
88 |
-
),
|
89 |
-
gr.Slider(
|
90 |
-
minimum=5,
|
91 |
-
maximum=50,
|
92 |
-
step=1,
|
93 |
-
label="Number of competency questions",
|
94 |
-
info="Please select the number of competency questions you want to generate."
|
95 |
-
)
|
96 |
-
],
|
97 |
-
outputs=[
|
98 |
-
gr.Textbox(label="Competency questions")
|
99 |
-
],
|
100 |
-
title="OntoChat",
|
101 |
-
)
|
102 |
|
103 |
clustering_interface = gr.Interface(
|
104 |
fn=clustering_generator,
|
105 |
inputs=[
|
106 |
-
gr.
|
107 |
label="Competency questions",
|
108 |
info="Please copy the previously generated competency questions and paste it here. You can also modify "
|
109 |
"the questions before submitting them."
|
@@ -131,14 +148,48 @@ clustering_interface = gr.Interface(
|
|
131 |
)
|
132 |
],
|
133 |
title="OntoChat",
|
|
|
|
|
|
|
|
|
134 |
)
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
demo = gr.TabbedInterface(
|
137 |
-
[user_story_interface, cq_interface, clustering_interface],
|
138 |
-
["User Story Generation", "Competency Question Extraction", "Competency Question Analysis"]
|
139 |
)
|
140 |
|
141 |
|
142 |
if __name__ == "__main__":
|
143 |
-
# demo.launch(share=True)
|
144 |
demo.launch()
|
|
|
28 |
|
29 |
with gr.Row():
|
30 |
with gr.Column():
|
31 |
+
user_story_chatbot = gr.Chatbot([
|
32 |
+
[None, "Hello! I am OntoChat, your conversational ontology engineering assistant."],
|
33 |
+
["I am a domain expert trying to create a user story to be used by ontology engineers. You are the "
|
34 |
+
"ontology expert. Only ask the following question once I have responded. Ask for the specifications "
|
35 |
+
"to generate a user story as a user of the system, which should include: 1. The Persona: What are "
|
36 |
+
"the name, occupation, skills and interests of the user? 2. The Goal: What is the goal of the user? "
|
37 |
+
"Are they facing specific issues? 3. Example Data: Do you have examples of the specific data "
|
38 |
+
"available? Make sure you have answers to all three questions before providing a user story. Only "
|
39 |
+
"ask the next question once I have responded.", "Sure. Let's start with the persona. What are the "
|
40 |
+
"name, occupations, skills, interests of the user?"]
|
41 |
+
])
|
42 |
+
user_story_input = gr.Textbox(
|
43 |
+
label="Chatbot input",
|
44 |
+
placeholder="Please type your message here and press Enter to interact with the chatbot :)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
)
|
46 |
user_story = gr.TextArea(
|
47 |
label="User story",
|
48 |
interactive=True
|
49 |
)
|
50 |
+
user_story_input.submit(
|
51 |
+
fn=user_story_generator,
|
52 |
inputs=[
|
53 |
+
user_story_input, user_story_chatbot
|
54 |
],
|
55 |
outputs=[
|
56 |
+
user_story, user_story_chatbot, user_story_input
|
57 |
]
|
58 |
)
|
59 |
+
|
60 |
+
|
61 |
+
with gr.Blocks() as cq_interface:
|
62 |
+
gr.Markdown(
|
63 |
+
"""
|
64 |
+
# OntoChat
|
65 |
+
This is the second step of OntoChat. Please copy the generated user story from the previous
|
66 |
+
step and use it here. You can also modify the user story before using it for generating competency questions.
|
67 |
+
**Recommended prompt workflow:**
|
68 |
+
1. Obtain competency questions from the user story.
|
69 |
+
- Zero-shot learning:
|
70 |
+
- Prompt template: Given the user story: {user story}, generate {number} competency questions base on it.
|
71 |
+
- Few-shot learning (i.e., provide examples to give more instructions on how to generate competency questions):
|
72 |
+
- Prompt template: Here are some good examples of competency questions generated from example data.
|
73 |
+
Formatted in {"Example data": "Competency questions"}.
|
74 |
+
{"Yesterday was performed by Armando Rocca.": "Who performs the song?"},
|
75 |
+
{"The Church was built in 1619.": "When (what year) was the building built?"},
|
76 |
+
{"The Church is located in a periurban context.": "In which context is the building located?"},
|
77 |
+
{"The mounting system of the bells is the falling clapper.": "Which is the mounting system of the bell?"}
|
78 |
+
2. Clean and refine competency questions.
|
79 |
+
- Obtain multiple competency questions.
|
80 |
+
- Prompt template: Take the generated competency questions and check if any of them can be divided into
|
81 |
+
multiple questions. If they do, split the competency question into multiple competency questions. If it
|
82 |
+
does not, leave the competency question as it is. For example, the competency question "Who wrote The
|
83 |
+
Hobbit and in what year was the book written?" must be split into two competency questions: "Who wrote
|
84 |
+
the book?" and "In what year was the book written?". Another example is the competency question, "When
|
85 |
+
was the person born?". This competency question cannot be divided into multiple questions.
|
86 |
+
- Remove specific named entities.
|
87 |
+
- Prompt template: Take the competency questions and check if they contain real-world entities, like
|
88 |
+
"Freddy Mercury" or "1837". If they do, change those real-world entities from these competency questions
|
89 |
+
to more general concepts. For example, the competency question "Which is the author of Harry Potter?"
|
90 |
+
should be changed to "Which is the author of the book?". Similarly, the competency question "Who wrote
|
91 |
+
the book in 2018?" should be changed to "Who wrote the book, and in what year was the book written?"
|
92 |
+
"""
|
93 |
+
)
|
94 |
+
|
95 |
+
with gr.Row():
|
96 |
+
with gr.Column():
|
97 |
+
cq_chatbot = gr.Chatbot([
|
98 |
+
[None, "I am OntoChat, your conversational ontology engineering assistant. Here is the second step of "
|
99 |
+
"the system. Please give me your user story and tell me how many competency questions you want."]
|
100 |
+
])
|
101 |
+
cq_input = gr.Textbox(
|
102 |
+
label="Chatbot input",
|
103 |
+
placeholder="Please type your message here and press Enter to interact with the chatbot :)"
|
104 |
+
)
|
105 |
+
cq_output = gr.TextArea(
|
106 |
+
label="Competency questions",
|
107 |
+
interactive=True
|
108 |
+
)
|
109 |
+
cq_input.submit(
|
110 |
+
fn=cq_generator,
|
111 |
inputs=[
|
112 |
+
cq_input, cq_chatbot
|
113 |
],
|
114 |
outputs=[
|
115 |
+
cq_output, cq_chatbot, cq_input
|
116 |
]
|
117 |
)
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
clustering_interface = gr.Interface(
|
121 |
fn=clustering_generator,
|
122 |
inputs=[
|
123 |
+
gr.TextArea(
|
124 |
label="Competency questions",
|
125 |
info="Please copy the previously generated competency questions and paste it here. You can also modify "
|
126 |
"the questions before submitting them."
|
|
|
148 |
)
|
149 |
],
|
150 |
title="OntoChat",
|
151 |
+
description="This is the third step of OntoChat. Please copy the generated competency questions from the previous "
|
152 |
+
"step and run the clustering algorithm to group the competency questions based on their topics. From "
|
153 |
+
"our experience, LLM clustering has the best performance.",
|
154 |
+
allow_flagging="never"
|
155 |
)
|
156 |
|
157 |
+
|
158 |
+
with gr.Blocks() as testing_interface:
|
159 |
+
gr.Markdown(
|
160 |
+
"""
|
161 |
+
# OntoChat
|
162 |
+
This is the final part of OntoChat which performs ontology testing based on the input ontology file and CQs.
|
163 |
+
"""
|
164 |
+
)
|
165 |
+
ontology_file = gr.File(label="Ontology file")
|
166 |
+
ontology_desc = gr.Textbox(
|
167 |
+
label="Ontology description",
|
168 |
+
placeholder="Please provide a description of the ontology uploaded to provide basic information and "
|
169 |
+
"additional context."
|
170 |
+
)
|
171 |
+
cq_testing_input = gr.Textbox(
|
172 |
+
label="Competency questions",
|
173 |
+
placeholder="Please provide the competency questions that you want to test with."
|
174 |
+
)
|
175 |
+
testing_btn = gr.Button(value="Test")
|
176 |
+
testing_output = gr.TextArea(label="Ontology testing output")
|
177 |
+
testing_btn.click(
|
178 |
+
fn=ontology_testing,
|
179 |
+
inputs=[
|
180 |
+
ontology_file, ontology_desc, cq_testing_input
|
181 |
+
],
|
182 |
+
outputs=[
|
183 |
+
testing_output
|
184 |
+
]
|
185 |
+
)
|
186 |
+
|
187 |
+
|
188 |
demo = gr.TabbedInterface(
|
189 |
+
[user_story_interface, cq_interface, clustering_interface, testing_interface],
|
190 |
+
["User Story Generation", "Competency Question Extraction", "Competency Question Analysis", "Ontology Testing"]
|
191 |
)
|
192 |
|
193 |
|
194 |
if __name__ == "__main__":
|
|
|
195 |
demo.launch()
|
ontochat/chatbot.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from openai import OpenAI
|
2 |
|
3 |
-
|
4 |
MODEL_NAME = "gpt-3.5-turbo"
|
5 |
TEMPERATURE = 0
|
6 |
SEED = 1234
|
@@ -17,19 +17,6 @@ def chat_completion(api_key, messages):
|
|
17 |
return completion.choices[0].message.content
|
18 |
|
19 |
|
20 |
-
def build_history(messages):
|
21 |
-
"""
|
22 |
-
convert OpenAI client messages to gradio.Chatbot history
|
23 |
-
:param messages:
|
24 |
-
:return:
|
25 |
-
"""
|
26 |
-
message_list = [None, ]
|
27 |
-
for item in messages:
|
28 |
-
message_list.append(item["content"])
|
29 |
-
history = [[message_list[i], message_list[i + 1]] for i in range(0, len(message_list), 2)]
|
30 |
-
return history
|
31 |
-
|
32 |
-
|
33 |
def build_messages(history):
|
34 |
"""
|
35 |
convert gardio.Chatbot history to OpenAI client messages
|
|
|
1 |
from openai import OpenAI
|
2 |
|
3 |
+
|
4 |
MODEL_NAME = "gpt-3.5-turbo"
|
5 |
TEMPERATURE = 0
|
6 |
SEED = 1234
|
|
|
17 |
return completion.choices[0].message.content
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def build_messages(history):
|
21 |
"""
|
22 |
convert gardio.Chatbot history to OpenAI client messages
|
ontochat/functions.py
CHANGED
@@ -4,73 +4,79 @@ Interface functions
|
|
4 |
|
5 |
import json
|
6 |
|
7 |
-
from ontochat.chatbot import chat_completion,
|
8 |
from ontochat.analysis import compute_embeddings, agglomerative_clustering, hdbscan_clustering, llm_cq_clustering
|
|
|
9 |
|
10 |
|
11 |
def set_openai_api_key(api_key: str):
|
12 |
global openai_api_key
|
13 |
openai_api_key = api_key
|
14 |
-
return "API key has been set!"
|
15 |
|
16 |
|
17 |
-
def
|
18 |
-
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
bot_message = chat_completion(openai_api_key, messages)
|
35 |
-
|
36 |
-
|
37 |
-
"content": bot_message
|
38 |
-
})
|
39 |
-
history = build_history(messages)
|
40 |
-
return bot_message, history
|
41 |
|
42 |
|
43 |
-
def
|
44 |
"""
|
45 |
-
|
|
|
46 |
:param message:
|
47 |
:param history:
|
48 |
:return:
|
49 |
"""
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
{
|
65 |
-
"role": "system",
|
66 |
-
"content": "You are an ontology engineer."
|
67 |
-
}, {
|
68 |
"role": "user",
|
69 |
-
"content":
|
70 |
-
}
|
71 |
-
|
72 |
-
|
73 |
-
return
|
74 |
|
75 |
|
76 |
def clustering_generator(cqs, cluster_method, n_clusters):
|
@@ -91,3 +97,26 @@ def clustering_generator(cqs, cluster_method, n_clusters):
|
|
91 |
cq_clusters, cluster_image = llm_cq_clustering(cqs, n_clusters, openai_api_key)
|
92 |
|
93 |
return cluster_image, json.dumps(cq_clusters, indent=4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
import json
|
6 |
|
7 |
+
from ontochat.chatbot import chat_completion, build_messages
|
8 |
from ontochat.analysis import compute_embeddings, agglomerative_clustering, hdbscan_clustering, llm_cq_clustering
|
9 |
+
from ontochat.verbaliser import verbalise_ontology
|
10 |
|
11 |
|
12 |
def set_openai_api_key(api_key: str):
|
13 |
global openai_api_key
|
14 |
openai_api_key = api_key
|
15 |
+
return "API key has been set! Now you can chat with the chatbot. Enjoy :)"
|
16 |
|
17 |
|
18 |
+
def user_story_generator(message, history):
|
19 |
+
print(history)
|
20 |
+
if len(history) == 1: # initial round
|
21 |
+
messages = [{
|
22 |
+
"role": "system",
|
23 |
+
"content": "Hello! I am OntoChat, your conversational ontology engineering assistant."
|
24 |
+
}, {
|
25 |
+
"role": "user",
|
26 |
+
"content": "I am a domain expert trying to create a user story to be used by ontology engineers. You are "
|
27 |
+
"the ontology expert. Only ask the following question once I have responded. Ask for the"
|
28 |
+
"specifications to generate a user story as a user of the system, which should include: 1. The "
|
29 |
+
"Persona: What are the name, occupation, skills and interests of the user? 2. The Goal: What is "
|
30 |
+
"the goal of the user? Are they facing specific issues? 3. Example Data: Do you have examples "
|
31 |
+
"of the specific data available? Make sure you have answers to all three questions before "
|
32 |
+
"providing a user story. Only ask the next question once I have responded."
|
33 |
+
}, {
|
34 |
+
"role": "system",
|
35 |
+
"content": "Sure. Let's start with the persona. What are the name, occupations, skills, interests of the user?"
|
36 |
+
}, {
|
37 |
+
"role": "user",
|
38 |
+
"content": message
|
39 |
+
}]
|
40 |
+
else:
|
41 |
+
messages = build_messages(history)
|
42 |
+
messages.append({
|
43 |
+
"role": "user",
|
44 |
+
"content": message
|
45 |
+
})
|
46 |
bot_message = chat_completion(openai_api_key, messages)
|
47 |
+
history.append([message, bot_message])
|
48 |
+
return bot_message, history, ""
|
|
|
|
|
|
|
|
|
49 |
|
50 |
|
51 |
+
def cq_generator(message, history):
|
52 |
"""
|
53 |
+
generate competency questions based on the user story
|
54 |
+
format constraint may not be necessary if we only use LLMs for clustering
|
55 |
:param message:
|
56 |
:param history:
|
57 |
:return:
|
58 |
"""
|
59 |
+
if (len(history)) == 1: # initial round
|
60 |
+
messages = [
|
61 |
+
{
|
62 |
+
"role": "system",
|
63 |
+
"content": "I am OntoChat, your conversational ontology engineering assistant. Here is the second step "
|
64 |
+
"of the system. Please give me your user story and tell me how many competency questions "
|
65 |
+
"you want."
|
66 |
+
}, {
|
67 |
+
"role": "user",
|
68 |
+
"content": message
|
69 |
+
}
|
70 |
+
]
|
71 |
+
else:
|
72 |
+
messages = build_messages(history)
|
73 |
+
messages.append({
|
|
|
|
|
|
|
74 |
"role": "user",
|
75 |
+
"content": message
|
76 |
+
})
|
77 |
+
bot_message = chat_completion(openai_api_key, messages)
|
78 |
+
history.append([message, bot_message])
|
79 |
+
return bot_message, history, ""
|
80 |
|
81 |
|
82 |
def clustering_generator(cqs, cluster_method, n_clusters):
|
|
|
97 |
cq_clusters, cluster_image = llm_cq_clustering(cqs, n_clusters, openai_api_key)
|
98 |
|
99 |
return cluster_image, json.dumps(cq_clusters, indent=4)
|
100 |
+
|
101 |
+
|
102 |
+
def ontology_testing(ontology_file, ontology_desc, cqs):
|
103 |
+
"""
|
104 |
+
|
105 |
+
:param ontology_file:
|
106 |
+
:param ontology_desc:
|
107 |
+
:param cqs:
|
108 |
+
:return:
|
109 |
+
"""
|
110 |
+
verbalisation = verbalise_ontology(ontology_file, ontology_desc, "")
|
111 |
+
messages = [{
|
112 |
+
"role": "system",
|
113 |
+
"content": "Please (1) provide a description of the ontology uploaded to provide basic information and "
|
114 |
+
"additional context, (2) give the competency questions (CQs) that you want to test with."
|
115 |
+
}, {
|
116 |
+
"role": "user",
|
117 |
+
"content": verbalisation + "\n" + f"Given the above ontology, please label each competency question: {cqs} to "
|
118 |
+
f"determine whether it is addressed properly or not. Format your response in"
|
119 |
+
f" ['yes': 'CQ1', 'no': 'CQ2', ...]."
|
120 |
+
}]
|
121 |
+
bot_message = chat_completion(openai_api_key, messages)
|
122 |
+
return bot_message
|
ontochat/queries.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
General-purpose SPARQL queries
|
3 |
+
|
4 |
+
"""
|
5 |
+
|
6 |
+
NE_QUERY = """
|
7 |
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
8 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
9 |
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
10 |
+
|
11 |
+
SELECT DISTINCT ?individual ?other
|
12 |
+
WHERE {
|
13 |
+
?individual rdf:type owl:NamedIndividual ;
|
14 |
+
rdf:type ?other .
|
15 |
+
FILTER ( ?other not in ( owl:NamedIndividual ) )
|
16 |
+
}
|
17 |
+
"""
|
ontochat/verbaliser.py
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Utilities for the verbalisation of an ontology.
|
3 |
+
|
4 |
+
Examples of possible uses cases for ontology verbalisation:
|
5 |
+
- Summarising the features provided by the ontology (doc)
|
6 |
+
- Using a LM to extract competency questions from the ontology.
|
7 |
+
- Asking a LM if the ontology can be used for certain requirements.
|
8 |
+
|
9 |
+
"""
|
10 |
+
import logging
|
11 |
+
from typing import List
|
12 |
+
|
13 |
+
import rdflib
|
14 |
+
from rdflib import Graph
|
15 |
+
from rdflib.namespace import RDF, RDFS, OWL
|
16 |
+
|
17 |
+
from ontochat.queries import NE_QUERY
|
18 |
+
|
19 |
+
logger = logging.getLogger("ontochat.verbaliser")
|
20 |
+
|
21 |
+
|
22 |
+
def verbalise_ontology(ontology_path: str, onto_about: str, onto_desc: str):
|
23 |
+
"""
|
24 |
+
A simple method to verbalise ontologies and extract requirements. This is
|
25 |
+
currently designed to produce a plain verbalisation.
|
26 |
+
|
27 |
+
Parameters
|
28 |
+
----------
|
29 |
+
ontology_path : str
|
30 |
+
Path to the ontology encoded in a format that is readable by `rdflib`.
|
31 |
+
onto_about : str
|
32 |
+
A short description of the ontology, if documentation is missing.
|
33 |
+
onto_desc : str
|
34 |
+
An extended description of the ontology to provide more context.
|
35 |
+
|
36 |
+
Returns
|
37 |
+
-------
|
38 |
+
verbalisation : str
|
39 |
+
A string verbalisation of the ontology produced by the language model.
|
40 |
+
|
41 |
+
"""
|
42 |
+
g = Graph()
|
43 |
+
g.parse(ontology_path)
|
44 |
+
|
45 |
+
# Everything that has a label is mapped here, otherwise we get a URI label
|
46 |
+
label_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.label, None))}
|
47 |
+
# just get the last part of the URI otw
|
48 |
+
label_fn = lambda x: label_dict[x] if x in label_dict else str(x).split("/")[-1]
|
49 |
+
comment_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.comment, None))}
|
50 |
+
|
51 |
+
logger.info("Class verbalisation: start")
|
52 |
+
class_vrbs = verbalise_classes(g, label_fn, comment_dict)
|
53 |
+
logger.info(f"Class verbalisation: found {len(class_vrbs)} classes")
|
54 |
+
|
55 |
+
logger.info("Named entity verbalisation: start")
|
56 |
+
nament_vrbs = verbalise_named_entities(g, label_fn, comment_dict)
|
57 |
+
logger.info(f"Named entity verbalisation: found {len(class_vrbs)} entities")
|
58 |
+
|
59 |
+
logger.info("Relation verbalisation: start")
|
60 |
+
relat_vrbs = verbalise_relations(g, label_fn, comment_dict)
|
61 |
+
logger.info(f"Relation verbalisation: found {len(class_vrbs)} classes")
|
62 |
+
|
63 |
+
return collate_verbalisations(class_vrbs, nament_vrbs, relat_vrbs, onto_about, onto_desc)
|
64 |
+
|
65 |
+
|
66 |
+
def create_relation_dict(graph, relation):
|
67 |
+
"""
|
68 |
+
Returns all the objects appearing as tails of the given relation.
|
69 |
+
"""
|
70 |
+
relation_dict = {} # subject to all possible objects via relation
|
71 |
+
for s, p, o in graph.triples((None, relation, None)):
|
72 |
+
if isinstance(o, rdflib.term.BNode):
|
73 |
+
continue # skip blank node
|
74 |
+
if s not in relation_dict:
|
75 |
+
relation_dict[s] = []
|
76 |
+
relation_dict[s].append(o)
|
77 |
+
return relation_dict
|
78 |
+
|
79 |
+
|
80 |
+
def verbalise_classes(graph: rdflib.Graph, label_fn, comment_dict: dict):
|
81 |
+
# Classes are first to be extracted, subclasses follow
|
82 |
+
classes = [s for s, _, _ in graph.triples((None, RDF.type, OWL.Class))]
|
83 |
+
subclasses = create_relation_dict(graph, relation=RDFS.subClassOf)
|
84 |
+
logger.info(f"Found: {len(classes)} classes, {len(subclasses)} subclasses")
|
85 |
+
# Step 1: Verbalisation of classes, one by one
|
86 |
+
verbalisation_hist = []
|
87 |
+
class_verbalisations = []
|
88 |
+
for base_class in classes:
|
89 |
+
# The base verbalisation is the class label, if available
|
90 |
+
vrbn = f"{label_fn(base_class)}"
|
91 |
+
|
92 |
+
if base_class in subclasses: # list all parent classes
|
93 |
+
vrbn += " (subconcept of " # opening parenthesis
|
94 |
+
vrbn += ", ".join([label_fn(u) for u in subclasses[base_class]])
|
95 |
+
vrbn += ")" # closing parenthesis
|
96 |
+
|
97 |
+
if base_class in comment_dict: # include comment
|
98 |
+
vrbn += f": {comment_dict[base_class]}"
|
99 |
+
|
100 |
+
verbalisation_hist.append(base_class)
|
101 |
+
class_verbalisations.append(vrbn)
|
102 |
+
|
103 |
+
# Step 2: verbalisation of remaining subclasses
|
104 |
+
for subclass in subclasses: # check remaining subclasses
|
105 |
+
if subclass not in verbalisation_hist:
|
106 |
+
raise NotImplementedError(subclass)
|
107 |
+
|
108 |
+
return class_verbalisations
|
109 |
+
|
110 |
+
|
111 |
+
def verbalise_named_entities(graph: rdflib.Graph, label, comment_dict: dict):
|
112 |
+
"""
|
113 |
+
Note: TODO append NE comment (if available) to each named entity.
|
114 |
+
Note: FIXME still, a named entity can have more than 1 parent class.
|
115 |
+
"""
|
116 |
+
qres = graph.query(NE_QUERY)
|
117 |
+
named_entities = {n: c for n, c in list(qres)}
|
118 |
+
|
119 |
+
nentities_verbalisations = []
|
120 |
+
for named_entity, named_type in named_entities.items():
|
121 |
+
verbalisation = f"{label(named_entity)} is an instance of class {label(named_type)}."
|
122 |
+
nentities_verbalisations.append(verbalisation)
|
123 |
+
|
124 |
+
return nentities_verbalisations
|
125 |
+
|
126 |
+
|
127 |
+
def verbalise_relations(graph: rdflib.Graph, label, comment_dict: dict):
|
128 |
+
properties = [s for s, _, _ in graph.triples(
|
129 |
+
(None, RDF.type, OWL.ObjectProperty))]
|
130 |
+
subprops = create_relation_dict(graph, relation=RDFS.subPropertyOf)
|
131 |
+
domains = create_relation_dict(graph, relation=RDFS.domain)
|
132 |
+
ranges = create_relation_dict(graph, relation=RDFS.range)
|
133 |
+
|
134 |
+
# Step 1: Verbalisation of classes
|
135 |
+
verbalisation_hist = []
|
136 |
+
relation_verbalisations = []
|
137 |
+
|
138 |
+
for base_prop in properties:
|
139 |
+
|
140 |
+
# The base verbalisation is the class label, if available
|
141 |
+
verbalisation = f"{label(base_prop)}"
|
142 |
+
|
143 |
+
if base_prop in subprops:
|
144 |
+
verbalisation += " (subproperty of " # opening parenthesis
|
145 |
+
verbalisation += ", and".join([label(u) for u in subprops[base_prop]])
|
146 |
+
verbalisation += ")" # closing parenthesis
|
147 |
+
|
148 |
+
if base_prop in comment_dict: # include
|
149 |
+
verbalisation += f": {comment_dict[base_prop]}"
|
150 |
+
|
151 |
+
if base_prop in domains:
|
152 |
+
verbalisation += f" The domain of this relation can be: "
|
153 |
+
verbalisation += ", or ".join([label(u) for u in domains[base_prop]])
|
154 |
+
verbalisation += "."
|
155 |
+
|
156 |
+
if base_prop in ranges:
|
157 |
+
verbalisation += f" The range of this relation can be: "
|
158 |
+
verbalisation += ", or ".join([label(u) for u in ranges[base_prop]])
|
159 |
+
verbalisation += "."
|
160 |
+
|
161 |
+
verbalisation_hist.append(base_prop)
|
162 |
+
relation_verbalisations.append(verbalisation)
|
163 |
+
|
164 |
+
for subprop in subprops: # check remaining subclasses
|
165 |
+
if subprop not in verbalisation_hist:
|
166 |
+
raise NotImplementedError(subprop)
|
167 |
+
|
168 |
+
return relation_verbalisations
|
169 |
+
|
170 |
+
|
171 |
+
def collate_verbalisations(class_verbalisations: List[str],
|
172 |
+
relation_verbalisations: List[str],
|
173 |
+
nentities_verbalisations: List[str],
|
174 |
+
onto_about: str, onto_desc: str,
|
175 |
+
):
|
176 |
+
ontoverb = "" # This is the basic prompt with the ontology description
|
177 |
+
# ontoverb += f"You are given an ontology about {onto_about}. {onto_desc}\n"
|
178 |
+
ontoverb += f"Ontology description: {onto_about}. {onto_desc}"
|
179 |
+
|
180 |
+
ontoverb += "\n"
|
181 |
+
|
182 |
+
ontoverb += "The main classes of the ontology are listed below:\n"
|
183 |
+
for class_verb in class_verbalisations:
|
184 |
+
ontoverb += f"- {class_verb}\n"
|
185 |
+
|
186 |
+
ontoverb += "\n"
|
187 |
+
|
188 |
+
ontoverb += "The main named entities (individuals) are listed below:\n"
|
189 |
+
|
190 |
+
for ne_verb in nentities_verbalisations:
|
191 |
+
ontoverb += f"- {ne_verb}\n"
|
192 |
+
|
193 |
+
ontoverb += "\n"
|
194 |
+
|
195 |
+
ontoverb += "The main relations of the ontology are listed below:\n"
|
196 |
+
for rel_verb in relation_verbalisations:
|
197 |
+
ontoverb += f"- {rel_verb}\n"
|
198 |
+
|
199 |
+
return ontoverb
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
openai
|
2 |
gradio
|
3 |
scikit-learn
|
4 |
-
sentence-transformers
|
|
|
|
1 |
openai
|
2 |
gradio
|
3 |
scikit-learn
|
4 |
+
sentence-transformers
|
5 |
+
rdflib
|