Bohui Zhang commited on
Commit
1f0f8d6
·
1 Parent(s): b740ca9

Update the second version

Browse files
README.md CHANGED
@@ -17,16 +17,16 @@ analysis, and testing. By interacting with a conversational agent, users can ste
17
  extraction of competency questions, while receiving computational support to analyse the overall requirements and test
18
  early versions of the resulting ontologies.
19
 
20
- ## Deploy
21
- If you would like to deploy this demo locally,
22
- 1. Create a python environment and install the requirements using `pip install -r requirements.txt`.
23
- 2. Run `app.py`.
 
24
 
25
  ## TODO
26
- - Hosting in Hugging Face Space
27
- - Add ontology testing
28
- - Add the evaluation panel (?)
29
- - Optimize clustering visualization
30
- - Adjust flagging
31
 
32
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
17
  extraction of competency questions, while receiving computational support to analyse the overall requirements and test
18
  early versions of the resulting ontologies.
19
 
20
+ ## GitHub Copy
21
+ - The source code will also be available soon on GitHub: [King-s-Knowledge-Graph-Lab/OntoChat](https://github.com/King-s-Knowledge-Graph-Lab/OntoChat).
22
+
23
+ ## Note
24
+ - The ontology testing part has been tested with the [Music Meta Ontology](https://github.com/polifonia-project/music-meta-ontology) and works well.
25
 
26
  ## TODO
27
+ - Improve the verbaliser (classes, named entities, and relations might be messy in some cases)
28
+ - Optimize clustering visualization (maybe only keep LLM lcustering)
29
+ - Add [flagging](https://www.gradio.app/docs/flagging), e.g., [`HuggingFaceDatasetSaver`](https://www.gradio.app/docs/flagging#hugging-face-dataset-saver-header)
30
+
 
31
 
32
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -28,82 +28,99 @@ with gr.Blocks() as user_story_interface:
28
 
29
  with gr.Row():
30
  with gr.Column():
31
- persona = gr.Textbox(
32
- label="Persona",
33
- placeholder="Please input the persona of the user, including the name, occupations, skills, interests.",
34
- info="Example: The user, Mark, is an experienced musicologist. He's an expert in western music, "
35
- "and plays piano and guitar."
36
- )
37
- goal = gr.Textbox(
38
- label="Goal",
39
- placeholder="Please input the goal of the user and any specific issues faced.",
40
- info="Example: The goal of the user is to analyse analogies and simmetries between music scores, "
41
- "with a particular focus on harmony and the lyrics of the music piece."
42
- )
43
- sample_data = gr.Textbox(
44
- label="Sample of Data",
45
- placeholder="Please input a sample of data.",
46
- info="Example: An example of data would be: - 'Let it be' by 'The Beatles' has a sequence of chords "
47
- "composed by 'F, Amin, F' that is recurring every time the lyrics say 'Let it be'; - The lyrics "
48
- "of 'Running with the Devil' by 'Van Halen' have a recurring chord sequence for the chorus and a "
49
- "recurring chord sequence for the bridge."
50
- )
51
- generate_btn = gr.Button(value="Generate")
52
- user_story_chatbot = gr.Chatbot(
53
-
54
- )
55
- chatbot_input = gr.Textbox(
56
- placeholder="Please tell me what improvements I should make to the user story :)"
57
  )
58
  user_story = gr.TextArea(
59
  label="User story",
60
  interactive=True
61
  )
62
- generate_btn.click(
63
- fn=user_story_init_generator,
64
  inputs=[
65
- persona, goal, sample_data
66
  ],
67
  outputs=[
68
- user_story, user_story_chatbot
69
  ]
70
  )
71
- chatbot_input.submit(
72
- fn=user_story_generator,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  inputs=[
74
- chatbot_input, user_story_chatbot
75
  ],
76
  outputs=[
77
- user_story, user_story_chatbot
78
  ]
79
  )
80
 
81
- cq_interface = gr.Interface(
82
- fn=cq_generator,
83
- inputs=[
84
- gr.Textbox(
85
- label="User story",
86
- info="Please copy the previously generated user story and paste it here. You can also modify the user "
87
- "story before submitting it."
88
- ),
89
- gr.Slider(
90
- minimum=5,
91
- maximum=50,
92
- step=1,
93
- label="Number of competency questions",
94
- info="Please select the number of competency questions you want to generate."
95
- )
96
- ],
97
- outputs=[
98
- gr.Textbox(label="Competency questions")
99
- ],
100
- title="OntoChat",
101
- )
102
 
103
  clustering_interface = gr.Interface(
104
  fn=clustering_generator,
105
  inputs=[
106
- gr.Textbox(
107
  label="Competency questions",
108
  info="Please copy the previously generated competency questions and paste it here. You can also modify "
109
  "the questions before submitting them."
@@ -131,14 +148,48 @@ clustering_interface = gr.Interface(
131
  )
132
  ],
133
  title="OntoChat",
 
 
 
 
134
  )
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  demo = gr.TabbedInterface(
137
- [user_story_interface, cq_interface, clustering_interface],
138
- ["User Story Generation", "Competency Question Extraction", "Competency Question Analysis"]
139
  )
140
 
141
 
142
  if __name__ == "__main__":
143
- # demo.launch(share=True)
144
  demo.launch()
 
28
 
29
  with gr.Row():
30
  with gr.Column():
31
+ user_story_chatbot = gr.Chatbot([
32
+ [None, "Hello! I am OntoChat, your conversational ontology engineering assistant."],
33
+ ["I am a domain expert trying to create a user story to be used by ontology engineers. You are the "
34
+ "ontology expert. Only ask the following question once I have responded. Ask for the specifications "
35
+ "to generate a user story as a user of the system, which should include: 1. The Persona: What are "
36
+ "the name, occupation, skills and interests of the user? 2. The Goal: What is the goal of the user? "
37
+ "Are they facing specific issues? 3. Example Data: Do you have examples of the specific data "
38
+ "available? Make sure you have answers to all three questions before providing a user story. Only "
39
+ "ask the next question once I have responded.", "Sure. Let's start with the persona. What are the "
40
+ "name, occupations, skills, interests of the user?"]
41
+ ])
42
+ user_story_input = gr.Textbox(
43
+ label="Chatbot input",
44
+ placeholder="Please type your message here and press Enter to interact with the chatbot :)"
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
46
  user_story = gr.TextArea(
47
  label="User story",
48
  interactive=True
49
  )
50
+ user_story_input.submit(
51
+ fn=user_story_generator,
52
  inputs=[
53
+ user_story_input, user_story_chatbot
54
  ],
55
  outputs=[
56
+ user_story, user_story_chatbot, user_story_input
57
  ]
58
  )
59
+
60
+
61
+ with gr.Blocks() as cq_interface:
62
+ gr.Markdown(
63
+ """
64
+ # OntoChat
65
+ This is the second step of OntoChat. Please copy the generated user story from the previous
66
+ step and use it here. You can also modify the user story before using it for generating competency questions.
67
+ **Recommended prompt workflow:**
68
+ 1. Obtain competency questions from the user story.
69
+ - Zero-shot learning:
70
+ - Prompt template: Given the user story: {user story}, generate {number} competency questions base on it.
71
+ - Few-shot learning (i.e., provide examples to give more instructions on how to generate competency questions):
72
+ - Prompt template: Here are some good examples of competency questions generated from example data.
73
+ Formatted in {"Example data": "Competency questions"}.
74
+ {"Yesterday was performed by Armando Rocca.": "Who performs the song?"},
75
+ {"The Church was built in 1619.": "When (what year) was the building built?"},
76
+ {"The Church is located in a periurban context.": "In which context is the building located?"},
77
+ {"The mounting system of the bells is the falling clapper.": "Which is the mounting system of the bell?"}
78
+ 2. Clean and refine competency questions.
79
+ - Obtain multiple competency questions.
80
+ - Prompt template: Take the generated competency questions and check if any of them can be divided into
81
+ multiple questions. If they do, split the competency question into multiple competency questions. If it
82
+ does not, leave the competency question as it is. For example, the competency question "Who wrote The
83
+ Hobbit and in what year was the book written?" must be split into two competency questions: "Who wrote
84
+ the book?" and "In what year was the book written?". Another example is the competency question, "When
85
+ was the person born?". This competency question cannot be divided into multiple questions.
86
+ - Remove specific named entities.
87
+ - Prompt template: Take the competency questions and check if they contain real-world entities, like
88
+ "Freddy Mercury" or "1837". If they do, change those real-world entities from these competency questions
89
+ to more general concepts. For example, the competency question "Which is the author of Harry Potter?"
90
+ should be changed to "Which is the author of the book?". Similarly, the competency question "Who wrote
91
+ the book in 2018?" should be changed to "Who wrote the book, and in what year was the book written?"
92
+ """
93
+ )
94
+
95
+ with gr.Row():
96
+ with gr.Column():
97
+ cq_chatbot = gr.Chatbot([
98
+ [None, "I am OntoChat, your conversational ontology engineering assistant. Here is the second step of "
99
+ "the system. Please give me your user story and tell me how many competency questions you want."]
100
+ ])
101
+ cq_input = gr.Textbox(
102
+ label="Chatbot input",
103
+ placeholder="Please type your message here and press Enter to interact with the chatbot :)"
104
+ )
105
+ cq_output = gr.TextArea(
106
+ label="Competency questions",
107
+ interactive=True
108
+ )
109
+ cq_input.submit(
110
+ fn=cq_generator,
111
  inputs=[
112
+ cq_input, cq_chatbot
113
  ],
114
  outputs=[
115
+ cq_output, cq_chatbot, cq_input
116
  ]
117
  )
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  clustering_interface = gr.Interface(
121
  fn=clustering_generator,
122
  inputs=[
123
+ gr.TextArea(
124
  label="Competency questions",
125
  info="Please copy the previously generated competency questions and paste it here. You can also modify "
126
  "the questions before submitting them."
 
148
  )
149
  ],
150
  title="OntoChat",
151
+ description="This is the third step of OntoChat. Please copy the generated competency questions from the previous "
152
+ "step and run the clustering algorithm to group the competency questions based on their topics. From "
153
+ "our experience, LLM clustering has the best performance.",
154
+ allow_flagging="never"
155
  )
156
 
157
+
158
+ with gr.Blocks() as testing_interface:
159
+ gr.Markdown(
160
+ """
161
+ # OntoChat
162
+ This is the final part of OntoChat which performs ontology testing based on the input ontology file and CQs.
163
+ """
164
+ )
165
+ ontology_file = gr.File(label="Ontology file")
166
+ ontology_desc = gr.Textbox(
167
+ label="Ontology description",
168
+ placeholder="Please provide a description of the ontology uploaded to provide basic information and "
169
+ "additional context."
170
+ )
171
+ cq_testing_input = gr.Textbox(
172
+ label="Competency questions",
173
+ placeholder="Please provide the competency questions that you want to test with."
174
+ )
175
+ testing_btn = gr.Button(value="Test")
176
+ testing_output = gr.TextArea(label="Ontology testing output")
177
+ testing_btn.click(
178
+ fn=ontology_testing,
179
+ inputs=[
180
+ ontology_file, ontology_desc, cq_testing_input
181
+ ],
182
+ outputs=[
183
+ testing_output
184
+ ]
185
+ )
186
+
187
+
188
  demo = gr.TabbedInterface(
189
+ [user_story_interface, cq_interface, clustering_interface, testing_interface],
190
+ ["User Story Generation", "Competency Question Extraction", "Competency Question Analysis", "Ontology Testing"]
191
  )
192
 
193
 
194
  if __name__ == "__main__":
 
195
  demo.launch()
ontochat/chatbot.py CHANGED
@@ -1,6 +1,6 @@
1
  from openai import OpenAI
2
 
3
- # client = OpenAI()
4
  MODEL_NAME = "gpt-3.5-turbo"
5
  TEMPERATURE = 0
6
  SEED = 1234
@@ -17,19 +17,6 @@ def chat_completion(api_key, messages):
17
  return completion.choices[0].message.content
18
 
19
 
20
- def build_history(messages):
21
- """
22
- convert OpenAI client messages to gradio.Chatbot history
23
- :param messages:
24
- :return:
25
- """
26
- message_list = [None, ]
27
- for item in messages:
28
- message_list.append(item["content"])
29
- history = [[message_list[i], message_list[i + 1]] for i in range(0, len(message_list), 2)]
30
- return history
31
-
32
-
33
  def build_messages(history):
34
  """
35
  convert gardio.Chatbot history to OpenAI client messages
 
1
  from openai import OpenAI
2
 
3
+
4
  MODEL_NAME = "gpt-3.5-turbo"
5
  TEMPERATURE = 0
6
  SEED = 1234
 
17
  return completion.choices[0].message.content
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def build_messages(history):
21
  """
22
  convert gardio.Chatbot history to OpenAI client messages
ontochat/functions.py CHANGED
@@ -4,73 +4,79 @@ Interface functions
4
 
5
  import json
6
 
7
- from ontochat.chatbot import chat_completion, build_history, build_messages
8
  from ontochat.analysis import compute_embeddings, agglomerative_clustering, hdbscan_clustering, llm_cq_clustering
 
9
 
10
 
11
  def set_openai_api_key(api_key: str):
12
  global openai_api_key
13
  openai_api_key = api_key
14
- return "API key has been set!"
15
 
16
 
17
- def user_story_init_generator(persona, goal, sample_data):
18
- # if os.environ.get("OPENAI_API_KEY") is None:
19
- # # openai.api_key = api_key
20
- # os.environ["OPENAI_API_KEY"] = api_key
21
- messages = [{
22
- "role": "system",
23
- "content": "I am a conversational ontology engineering assistant, to help the user generate user stories, "
24
- "elicit requirements, and extract and analyze competency questions. In ontology engineering, "
25
- "a user story contains all the requirements from the perspective of an end user of the ontology. "
26
- "It is a way of capturing what a user needs to achieve with the ontology while also providing "
27
- "context and value. I will guide the user step-by-step to create a user story and generate "
28
- "competency questions from it."
29
- }, {
30
- "role": "user",
31
- "content": f"The persona of the user is {persona}. The goal of the user is {goal}. A sampple of data is "
32
- f"{sample_data}. Write a user story for the ontology that fit into the information provided."
33
- }]
 
 
 
 
 
 
 
 
 
 
 
34
  bot_message = chat_completion(openai_api_key, messages)
35
- messages.append({
36
- "role": "system",
37
- "content": bot_message
38
- })
39
- history = build_history(messages)
40
- return bot_message, history
41
 
42
 
43
- def user_story_generator(message, history):
44
  """
45
-
 
46
  :param message:
47
  :param history:
48
  :return:
49
  """
50
- messages = build_messages(history)
51
- bot_message = chat_completion(openai_api_key, messages)
52
- history.append((message, bot_message))
53
- return bot_message, history
54
-
55
-
56
- def cq_generator(messages, numbers):
57
- """
58
-
59
- :param messages:
60
- :param numbers:
61
- :return:
62
- """
63
- messages = [
64
- {
65
- "role": "system",
66
- "content": "You are an ontology engineer."
67
- }, {
68
  "role": "user",
69
- "content": f"Please generate {numbers} competency questions based on the user story: {messages}"
70
- } # TODO: format constraint
71
- ]
72
- response = chat_completion(openai_api_key, messages)
73
- return response
74
 
75
 
76
  def clustering_generator(cqs, cluster_method, n_clusters):
@@ -91,3 +97,26 @@ def clustering_generator(cqs, cluster_method, n_clusters):
91
  cq_clusters, cluster_image = llm_cq_clustering(cqs, n_clusters, openai_api_key)
92
 
93
  return cluster_image, json.dumps(cq_clusters, indent=4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  import json
6
 
7
+ from ontochat.chatbot import chat_completion, build_messages
8
  from ontochat.analysis import compute_embeddings, agglomerative_clustering, hdbscan_clustering, llm_cq_clustering
9
+ from ontochat.verbaliser import verbalise_ontology
10
 
11
 
12
  def set_openai_api_key(api_key: str):
13
  global openai_api_key
14
  openai_api_key = api_key
15
+ return "API key has been set! Now you can chat with the chatbot. Enjoy :)"
16
 
17
 
18
+ def user_story_generator(message, history):
19
+ print(history)
20
+ if len(history) == 1: # initial round
21
+ messages = [{
22
+ "role": "system",
23
+ "content": "Hello! I am OntoChat, your conversational ontology engineering assistant."
24
+ }, {
25
+ "role": "user",
26
+ "content": "I am a domain expert trying to create a user story to be used by ontology engineers. You are "
27
+ "the ontology expert. Only ask the following question once I have responded. Ask for the"
28
+ "specifications to generate a user story as a user of the system, which should include: 1. The "
29
+ "Persona: What are the name, occupation, skills and interests of the user? 2. The Goal: What is "
30
+ "the goal of the user? Are they facing specific issues? 3. Example Data: Do you have examples "
31
+ "of the specific data available? Make sure you have answers to all three questions before "
32
+ "providing a user story. Only ask the next question once I have responded."
33
+ }, {
34
+ "role": "system",
35
+ "content": "Sure. Let's start with the persona. What are the name, occupations, skills, interests of the user?"
36
+ }, {
37
+ "role": "user",
38
+ "content": message
39
+ }]
40
+ else:
41
+ messages = build_messages(history)
42
+ messages.append({
43
+ "role": "user",
44
+ "content": message
45
+ })
46
  bot_message = chat_completion(openai_api_key, messages)
47
+ history.append([message, bot_message])
48
+ return bot_message, history, ""
 
 
 
 
49
 
50
 
51
+ def cq_generator(message, history):
52
  """
53
+ generate competency questions based on the user story
54
+ format constraint may not be necessary if we only use LLMs for clustering
55
  :param message:
56
  :param history:
57
  :return:
58
  """
59
+ if (len(history)) == 1: # initial round
60
+ messages = [
61
+ {
62
+ "role": "system",
63
+ "content": "I am OntoChat, your conversational ontology engineering assistant. Here is the second step "
64
+ "of the system. Please give me your user story and tell me how many competency questions "
65
+ "you want."
66
+ }, {
67
+ "role": "user",
68
+ "content": message
69
+ }
70
+ ]
71
+ else:
72
+ messages = build_messages(history)
73
+ messages.append({
 
 
 
74
  "role": "user",
75
+ "content": message
76
+ })
77
+ bot_message = chat_completion(openai_api_key, messages)
78
+ history.append([message, bot_message])
79
+ return bot_message, history, ""
80
 
81
 
82
  def clustering_generator(cqs, cluster_method, n_clusters):
 
97
  cq_clusters, cluster_image = llm_cq_clustering(cqs, n_clusters, openai_api_key)
98
 
99
  return cluster_image, json.dumps(cq_clusters, indent=4)
100
+
101
+
102
+ def ontology_testing(ontology_file, ontology_desc, cqs):
103
+ """
104
+
105
+ :param ontology_file:
106
+ :param ontology_desc:
107
+ :param cqs:
108
+ :return:
109
+ """
110
+ verbalisation = verbalise_ontology(ontology_file, ontology_desc, "")
111
+ messages = [{
112
+ "role": "system",
113
+ "content": "Please (1) provide a description of the ontology uploaded to provide basic information and "
114
+ "additional context, (2) give the competency questions (CQs) that you want to test with."
115
+ }, {
116
+ "role": "user",
117
+ "content": verbalisation + "\n" + f"Given the above ontology, please label each competency question: {cqs} to "
118
+ f"determine whether it is addressed properly or not. Format your response in"
119
+ f" ['yes': 'CQ1', 'no': 'CQ2', ...]."
120
+ }]
121
+ bot_message = chat_completion(openai_api_key, messages)
122
+ return bot_message
ontochat/queries.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ General-purpose SPARQL queries
3
+
4
+ """
5
+
6
+ NE_QUERY = """
7
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
8
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
9
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
10
+
11
+ SELECT DISTINCT ?individual ?other
12
+ WHERE {
13
+ ?individual rdf:type owl:NamedIndividual ;
14
+ rdf:type ?other .
15
+ FILTER ( ?other not in ( owl:NamedIndividual ) )
16
+ }
17
+ """
ontochat/verbaliser.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utilities for the verbalisation of an ontology.
3
+
4
+ Examples of possible uses cases for ontology verbalisation:
5
+ - Summarising the features provided by the ontology (doc)
6
+ - Using a LM to extract competency questions from the ontology.
7
+ - Asking a LM if the ontology can be used for certain requirements.
8
+
9
+ """
10
+ import logging
11
+ from typing import List
12
+
13
+ import rdflib
14
+ from rdflib import Graph
15
+ from rdflib.namespace import RDF, RDFS, OWL
16
+
17
+ from ontochat.queries import NE_QUERY
18
+
19
+ logger = logging.getLogger("ontochat.verbaliser")
20
+
21
+
22
+ def verbalise_ontology(ontology_path: str, onto_about: str, onto_desc: str):
23
+ """
24
+ A simple method to verbalise ontologies and extract requirements. This is
25
+ currently designed to produce a plain verbalisation.
26
+
27
+ Parameters
28
+ ----------
29
+ ontology_path : str
30
+ Path to the ontology encoded in a format that is readable by `rdflib`.
31
+ onto_about : str
32
+ A short description of the ontology, if documentation is missing.
33
+ onto_desc : str
34
+ An extended description of the ontology to provide more context.
35
+
36
+ Returns
37
+ -------
38
+ verbalisation : str
39
+ A string verbalisation of the ontology produced by the language model.
40
+
41
+ """
42
+ g = Graph()
43
+ g.parse(ontology_path)
44
+
45
+ # Everything that has a label is mapped here, otherwise we get a URI label
46
+ label_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.label, None))}
47
+ # just get the last part of the URI otw
48
+ label_fn = lambda x: label_dict[x] if x in label_dict else str(x).split("/")[-1]
49
+ comment_dict = {s: str(o) for s, _, o in g.triples((None, RDFS.comment, None))}
50
+
51
+ logger.info("Class verbalisation: start")
52
+ class_vrbs = verbalise_classes(g, label_fn, comment_dict)
53
+ logger.info(f"Class verbalisation: found {len(class_vrbs)} classes")
54
+
55
+ logger.info("Named entity verbalisation: start")
56
+ nament_vrbs = verbalise_named_entities(g, label_fn, comment_dict)
57
+ logger.info(f"Named entity verbalisation: found {len(class_vrbs)} entities")
58
+
59
+ logger.info("Relation verbalisation: start")
60
+ relat_vrbs = verbalise_relations(g, label_fn, comment_dict)
61
+ logger.info(f"Relation verbalisation: found {len(class_vrbs)} classes")
62
+
63
+ return collate_verbalisations(class_vrbs, nament_vrbs, relat_vrbs, onto_about, onto_desc)
64
+
65
+
66
+ def create_relation_dict(graph, relation):
67
+ """
68
+ Returns all the objects appearing as tails of the given relation.
69
+ """
70
+ relation_dict = {} # subject to all possible objects via relation
71
+ for s, p, o in graph.triples((None, relation, None)):
72
+ if isinstance(o, rdflib.term.BNode):
73
+ continue # skip blank node
74
+ if s not in relation_dict:
75
+ relation_dict[s] = []
76
+ relation_dict[s].append(o)
77
+ return relation_dict
78
+
79
+
80
+ def verbalise_classes(graph: rdflib.Graph, label_fn, comment_dict: dict):
81
+ # Classes are first to be extracted, subclasses follow
82
+ classes = [s for s, _, _ in graph.triples((None, RDF.type, OWL.Class))]
83
+ subclasses = create_relation_dict(graph, relation=RDFS.subClassOf)
84
+ logger.info(f"Found: {len(classes)} classes, {len(subclasses)} subclasses")
85
+ # Step 1: Verbalisation of classes, one by one
86
+ verbalisation_hist = []
87
+ class_verbalisations = []
88
+ for base_class in classes:
89
+ # The base verbalisation is the class label, if available
90
+ vrbn = f"{label_fn(base_class)}"
91
+
92
+ if base_class in subclasses: # list all parent classes
93
+ vrbn += " (subconcept of " # opening parenthesis
94
+ vrbn += ", ".join([label_fn(u) for u in subclasses[base_class]])
95
+ vrbn += ")" # closing parenthesis
96
+
97
+ if base_class in comment_dict: # include comment
98
+ vrbn += f": {comment_dict[base_class]}"
99
+
100
+ verbalisation_hist.append(base_class)
101
+ class_verbalisations.append(vrbn)
102
+
103
+ # Step 2: verbalisation of remaining subclasses
104
+ for subclass in subclasses: # check remaining subclasses
105
+ if subclass not in verbalisation_hist:
106
+ raise NotImplementedError(subclass)
107
+
108
+ return class_verbalisations
109
+
110
+
111
+ def verbalise_named_entities(graph: rdflib.Graph, label, comment_dict: dict):
112
+ """
113
+ Note: TODO append NE comment (if available) to each named entity.
114
+ Note: FIXME still, a named entity can have more than 1 parent class.
115
+ """
116
+ qres = graph.query(NE_QUERY)
117
+ named_entities = {n: c for n, c in list(qres)}
118
+
119
+ nentities_verbalisations = []
120
+ for named_entity, named_type in named_entities.items():
121
+ verbalisation = f"{label(named_entity)} is an instance of class {label(named_type)}."
122
+ nentities_verbalisations.append(verbalisation)
123
+
124
+ return nentities_verbalisations
125
+
126
+
127
+ def verbalise_relations(graph: rdflib.Graph, label, comment_dict: dict):
128
+ properties = [s for s, _, _ in graph.triples(
129
+ (None, RDF.type, OWL.ObjectProperty))]
130
+ subprops = create_relation_dict(graph, relation=RDFS.subPropertyOf)
131
+ domains = create_relation_dict(graph, relation=RDFS.domain)
132
+ ranges = create_relation_dict(graph, relation=RDFS.range)
133
+
134
+ # Step 1: Verbalisation of classes
135
+ verbalisation_hist = []
136
+ relation_verbalisations = []
137
+
138
+ for base_prop in properties:
139
+
140
+ # The base verbalisation is the class label, if available
141
+ verbalisation = f"{label(base_prop)}"
142
+
143
+ if base_prop in subprops:
144
+ verbalisation += " (subproperty of " # opening parenthesis
145
+ verbalisation += ", and".join([label(u) for u in subprops[base_prop]])
146
+ verbalisation += ")" # closing parenthesis
147
+
148
+ if base_prop in comment_dict: # include
149
+ verbalisation += f": {comment_dict[base_prop]}"
150
+
151
+ if base_prop in domains:
152
+ verbalisation += f" The domain of this relation can be: "
153
+ verbalisation += ", or ".join([label(u) for u in domains[base_prop]])
154
+ verbalisation += "."
155
+
156
+ if base_prop in ranges:
157
+ verbalisation += f" The range of this relation can be: "
158
+ verbalisation += ", or ".join([label(u) for u in ranges[base_prop]])
159
+ verbalisation += "."
160
+
161
+ verbalisation_hist.append(base_prop)
162
+ relation_verbalisations.append(verbalisation)
163
+
164
+ for subprop in subprops: # check remaining subclasses
165
+ if subprop not in verbalisation_hist:
166
+ raise NotImplementedError(subprop)
167
+
168
+ return relation_verbalisations
169
+
170
+
171
+ def collate_verbalisations(class_verbalisations: List[str],
172
+ relation_verbalisations: List[str],
173
+ nentities_verbalisations: List[str],
174
+ onto_about: str, onto_desc: str,
175
+ ):
176
+ ontoverb = "" # This is the basic prompt with the ontology description
177
+ # ontoverb += f"You are given an ontology about {onto_about}. {onto_desc}\n"
178
+ ontoverb += f"Ontology description: {onto_about}. {onto_desc}"
179
+
180
+ ontoverb += "\n"
181
+
182
+ ontoverb += "The main classes of the ontology are listed below:\n"
183
+ for class_verb in class_verbalisations:
184
+ ontoverb += f"- {class_verb}\n"
185
+
186
+ ontoverb += "\n"
187
+
188
+ ontoverb += "The main named entities (individuals) are listed below:\n"
189
+
190
+ for ne_verb in nentities_verbalisations:
191
+ ontoverb += f"- {ne_verb}\n"
192
+
193
+ ontoverb += "\n"
194
+
195
+ ontoverb += "The main relations of the ontology are listed below:\n"
196
+ for rel_verb in relation_verbalisations:
197
+ ontoverb += f"- {rel_verb}\n"
198
+
199
+ return ontoverb
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  openai
2
  gradio
3
  scikit-learn
4
- sentence-transformers
 
 
1
  openai
2
  gradio
3
  scikit-learn
4
+ sentence-transformers
5
+ rdflib