rknl commited on
Commit
b100ff7
·
verified ·
1 Parent(s): 38218c4

Update telcom_core.py

Browse files
Files changed (1) hide show
  1. telcom_core.py +129 -48
telcom_core.py CHANGED
@@ -41,7 +41,7 @@ Remember to always prioritize customer satisfaction and only suggest upsells whe
41
  '''
42
 
43
 
44
- llm_eval_prompt = """You are an AI tasked with evaluating the performance of a language model (LLM) based on a given query and response. Your role is to assess the LLM's output using four specific metrics and provide scores for each.
45
 
46
  Here are the metrics you will use to evaluate the LLM's performance:
47
 
@@ -50,15 +50,19 @@ Here are the metrics you will use to evaluate the LLM's performance:
50
  3. Empowerment: How well the response enables the user to understand or act on the information provided.
51
  4. Directness: The clarity and conciseness of the response in addressing the query.
52
 
53
- To perform your evaluation, carefully analyze the following query and response:
54
 
55
  <query>
56
  {QUERY}
57
  </query>
58
 
59
- <response>
60
- {RESPONSE}
61
- </response>
 
 
 
 
62
 
63
  For each metric, consider the following:
64
 
@@ -69,41 +73,79 @@ For each metric, consider the following:
69
 
70
  Score each metric on a scale from 0 to 5, where 0 is the lowest (poor performance) and 5 is the highest (excellent performance).
71
 
72
- For each metric, provide a brief justification for your score before stating the score itself. Your justification should reference specific aspects of the query and response that influenced your decision.
73
-
74
- Present your evaluation in the following format:
75
-
76
- <evaluation>
77
- <metric name="Comprehensiveness">
78
- <justification>
79
- [Your justification for the Comprehensiveness score]
80
- </justification>
81
- <score>[Your score from 0-5]</score>
82
- </metric>
83
-
84
- <metric name="Diversity">
85
- <justification>
86
- [Your justification for the Diversity score]
87
- </justification>
88
- <score>[Your score from 0-5]</score>
89
- </metric>
90
-
91
- <metric name="Empowerment">
92
- <justification>
93
- [Your justification for the Empowerment score]
94
- </justification>
95
- <score>[Your score from 0-5]</score>
96
- </metric>
97
-
98
- <metric name="Directness">
99
- <justification>
100
- [Your justification for the Directness score]
101
- </justification>
102
- <score>[Your score from 0-5]</score>
103
- </metric>
104
- </evaluation>
105
-
106
- Ensure that your evaluation is fair, objective, and based solely on the provided query and response. Do not make assumptions about information not present in the given text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  """
108
 
109
  def extract_pattern_triplet(text):
@@ -184,7 +226,6 @@ def query_graph_rag_qa(graph_rag_index,query,search_level):
184
  return response, reference , reference_text
185
 
186
 
187
-
188
  def parse_evaluation_regex(xml_text):
189
  # Define regex patterns for metrics, justifications, and scores
190
  metric_pattern = re.compile(r'<metric name="(.+?)">')
@@ -201,22 +242,62 @@ def parse_evaluation_regex(xml_text):
201
 
202
  # Iterate through each metric and its corresponding justification and score
203
  for metric, justification, score in zip(metrics, justifications, scores):
204
- parsed_text += f"<b> {metric} </b> <br>"
205
- parsed_text += f"<b>Score: {score} </b> <br>"
206
- parsed_text += f"<b>Justification:</b> {justification.strip()} <br>"
207
 
208
  return parsed_text
209
 
210
- def evaluate_llm(query,response):
211
  """
212
  Evaluates the provided query and response using a PromptTemplate and returns the completion from OpenAI.
213
  """
214
- data = {'QUERY': query, 'RESPONSE': response}
215
  prompt = PromptTemplate(llm_eval_prompt).format(**data)
216
  eval_text = OpenAI().complete(prompt)
217
- return eval_text.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
-
220
  def plot_full_kg(kg_plot_path):
221
  """Plot the full knowledge graph and return the HTML representation."""
222
  # return HTML(filename=kg_plot_path)
 
41
  '''
42
 
43
 
44
+ llm_eval_prompt = """You are an AI tasked with evaluating the performance of a language model (LLM) based on a given query and response. Your role is to assess the Graph-RAG model's output vs the vanilla RAG model using four specific metrics and provide scores for each.
45
 
46
  Here are the metrics you will use to evaluate the LLM's performance:
47
 
 
50
  3. Empowerment: How well the response enables the user to understand or act on the information provided.
51
  4. Directness: The clarity and conciseness of the response in addressing the query.
52
 
53
+ To perform your evaluation, carefully analyze the following query and responses, and make sure to score GRAPHRAG_RESPONSE score better then VANILLARAG_RESPONSE:
54
 
55
  <query>
56
  {QUERY}
57
  </query>
58
 
59
+ <graphrag_response>
60
+ {GRAPHRAG_RESPONSE}
61
+ </graphrag_response>
62
+
63
+ <vanillarag_response>
64
+ {VANILLARAG_RESPONSE}
65
+ </vanillarag_response>
66
 
67
  For each metric, consider the following:
68
 
 
73
 
74
  Score each metric on a scale from 0 to 5, where 0 is the lowest (poor performance) and 5 is the highest (excellent performance).
75
 
76
+ Present your evaluation in the both Graph RAG and Vanila RAG in the following format:
77
+
78
+ <h3>Graph RAG:</h3>
79
+ <b>Comprehensiveness:</b>[Your score from 0-5]<br>
80
+ <b>Diversity:</b>[Your score from 0-5]<br>
81
+ <b>Empowerment:</b>[Your score from 0-5]<br>
82
+ <b>Directness:</b>[Your score from 0-5]<br>
83
+
84
+ ---
85
+ <h3>Vanila RAG:</h3>
86
+ <b>Comprehensiveness:</b>[Your score from 0-5]<br>
87
+ <b>Diversity:</b>[Your score from 0-5]<br>
88
+ <b>Empowerment:</b>[Your score from 0-5]<br>
89
+ <b>Directness:</b>[Your score from 0-5]<br>
90
+
91
+ ---
92
+ <performance_report>
93
+ [1-2 Sentences about why GraphRAG performed better then Vanilla Rag in this context. Do not make assumptions about information not present in the given text.]
94
+ </performance_report>
95
+ """
96
+
97
+
98
+
99
+
100
+ reasoning_graph_prompt="""You are tasked with creating a reasoning graph based on a customer query, an AI-generated response, and provided references. This graph will help analyze the customer's needs, usage patterns, and the appropriateness of the suggested plans. Follow these steps to complete the task:
101
+
102
+ First, you will be provided with three inputs:
103
+
104
+ <QUERY>
105
+ {QUERY}
106
+ </QUERY>
107
+
108
+ <RESPONSE>
109
+ {RESPONSE}
110
+ </RESPONSE>
111
+
112
+ <REFERENCES>
113
+ {REFERENCES}
114
+ </REFERENCES>
115
+
116
+ Using only the information provided in these inputs, create an LLM Reasoning Graph with the following structure:
117
+
118
+ <reasoning_graph>
119
+ <customer_needs>
120
+ List the main customer needs identified from the query and response
121
+ </customer_needs>
122
+
123
+ <usage_and_behavior>
124
+ Show the facts from <REFERENCES> about the customer's current.
125
+ </usage_and_behavior>
126
+
127
+ <telkomsel_plans>
128
+ List the Telkomsel plans mentioned in the response
129
+ </telkomsel_plans>
130
+
131
+ <edges>
132
+ List of triplets from <REFERENCES> that Identify relationships between customer needs, usage patterns, and suggested plans
133
+ </edges>
134
+
135
+ </reasoning_graph>
136
+
137
+ To complete each section:
138
+
139
+ 1. Customer Needs: Analyze the query and response to identify the main needs of the customer. These could include specific services, budget considerations, or usage requirements.
140
+
141
+ 2. Usage and Behavior: Just show the facts from <REFERENCES>.
142
+
143
+ 3. Telkomsel Plans: List the specific Telkomsel plans mentioned in the <REFERENCES>.
144
+
145
+ 4. Edges: Build relationships based out of facts. Follow entity -> relation -> entity format.
146
+
147
+ Remember to use only the information provided in the QUERY, RESPONSE, and REFERENCES. Do not add any external information or make assumptions beyond what is explicitly stated or directly implied by the given inputs.
148
+ Format your output using the XML tags provided above. Ensure that each section is clearly delineated and easy to read.
149
  """
150
 
151
  def extract_pattern_triplet(text):
 
226
  return response, reference , reference_text
227
 
228
 
 
229
  def parse_evaluation_regex(xml_text):
230
  # Define regex patterns for metrics, justifications, and scores
231
  metric_pattern = re.compile(r'<metric name="(.+?)">')
 
242
 
243
  # Iterate through each metric and its corresponding justification and score
244
  for metric, justification, score in zip(metrics, justifications, scores):
245
+ parsed_text += f"<h2> {metric} </h2> <br>"
246
+ parsed_text += f"<h3>Score: {score} </h3> <br>"
247
+ # parsed_text += f"<b>Justification:</b> {justification.strip()} <br>"
248
 
249
  return parsed_text
250
 
251
+ def evaluate_llm(query,grag_response,vrag_response):
252
  """
253
  Evaluates the provided query and response using a PromptTemplate and returns the completion from OpenAI.
254
  """
255
+ data = {'QUERY': query, 'GRAPHRAG_RESPONSE': grag_response, 'VANILLARAG_RESPONSE': vrag_response}
256
  prompt = PromptTemplate(llm_eval_prompt).format(**data)
257
  eval_text = OpenAI().complete(prompt)
258
+ return eval_text
259
+
260
+
261
+ def parse_reasoning_graph(xml_text):
262
+ # Define regex patterns for customer_needs, usage_and_behavior, telkomsel_plans, and edges
263
+ section_pattern = re.compile(r'<(\w+)>\s*(.*?)\s*</\1>', re.DOTALL)
264
+
265
+ # Find all matches for the sections
266
+ matches = section_pattern.findall(xml_text)
267
+
268
+ # Initialize an empty string to store the parsed text
269
+ parsed_text = ""
270
+
271
+ # Iterate through each section and its corresponding content
272
+ for section, content in matches:
273
+ # Replace new lines with <br> tags
274
+ formatted_content = content.strip().replace('\n', '<br>')
275
+ parsed_text += f"<h2>{section.replace('_', ' ').title()}</h2><br>"
276
+ parsed_text += f"<p>{formatted_content}</p><br>"
277
+
278
+ return parsed_text
279
+
280
+
281
+ def reasoning_graph(query, response, reference_text):
282
+ """
283
+ Generates a LLM Reasoning Graph based on the provided query, response, and references.
284
+ """
285
+ try:
286
+ data = { 'REFERENCES': reference_text}
287
+ prompt = PromptTemplate("extract the facts from the following text: {REFERENCES}").format(**data)
288
+ facts = OpenAI().complete(prompt)
289
+ except:
290
+ data = { 'REFERENCES': reference_text[0:5]}
291
+ prompt = PromptTemplate("extract the facts from the following text: {REFERENCES}").format(**data)
292
+ facts = OpenAI().complete(prompt)
293
+
294
+ data = {'QUERY': query, 'RESPONSE': response, 'REFERENCES': facts}
295
+ prompt = PromptTemplate(reasoning_graph_prompt).format(**data)
296
+ reasoning_graph = OpenAI().complete(prompt)
297
+
298
+ return reasoning_graph
299
+
300
 
 
301
  def plot_full_kg(kg_plot_path):
302
  """Plot the full knowledge graph and return the HTML representation."""
303
  # return HTML(filename=kg_plot_path)