Commit
·
23fee25
1
Parent(s):
0a08480
refactoring: create separate file for the prompt template
Browse files- app.py +5 -39
- prompt_template.py +36 -0
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from pandas import DataFrame as PandasDataFrame
|
|
| 9 |
from llm import MessageChatCompletion
|
| 10 |
from customization import css, js
|
| 11 |
from examples import example_1, example_2, example_3, example_4
|
|
|
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
|
@@ -33,46 +34,10 @@ def build_context(row):
|
|
| 33 |
|
| 34 |
def click_button(model, api_key, abstract):
|
| 35 |
labels = df['Subsector'].tolist()
|
| 36 |
-
|
| 37 |
language_model = MessageChatCompletion(model=model, api_key=api_key)
|
| 38 |
-
system_message = (
|
| 39 |
-
|
| 40 |
-
Each subsector is defined by a unique set of characteristics:
|
| 41 |
-
Name: The name of the subsector.
|
| 42 |
-
Definition: A brief description of the subsector.
|
| 43 |
-
Keywords: Important words associated with the subsector.
|
| 44 |
-
Does include: Elements typically found within the subsector.
|
| 45 |
-
Does not include: Elements typically not found within the subsector.
|
| 46 |
-
Consider 'nan' values as 'not available' or 'not applicable'.
|
| 47 |
-
When classifying an abstract, provide the following:
|
| 48 |
-
## 1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to.
|
| 49 |
-
## 2. Reasoning:
|
| 50 |
-
### Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria.
|
| 51 |
-
### Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract.
|
| 52 |
-
### Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract.
|
| 53 |
-
### If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector.
|
| 54 |
-
## 3. Non-selected Subsectors:
|
| 55 |
-
- If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion.
|
| 56 |
-
## 4. Other Subsectors: You MUST ALWAYS SUGGEST NEW SUBSECTOR LABELS, different from the ones provided by the user. They can be new subsectors or subsets the given subsectors. REMEMBER: This is mandatory
|
| 57 |
-
## 5. Match Score: Inside a markdown code block, provide a PYTHON DICTIONARY containing the match scores for all existing subsector labels and for any new labels suggested in item 4. Each probability should be formatted to show two decimal places.
|
| 58 |
-
<context>
|
| 59 |
-
{contexts}
|
| 60 |
-
</context>
|
| 61 |
-
""")
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
user_message = f"""
|
| 65 |
-
Classify this patent abstract into one or more labels, then format your response as markdown:
|
| 66 |
-
|
| 67 |
-
<labels>
|
| 68 |
-
{labels}
|
| 69 |
-
</labels>
|
| 70 |
-
|
| 71 |
-
<abstract>
|
| 72 |
-
{abstract}
|
| 73 |
-
</abstract>
|
| 74 |
-
"""
|
| 75 |
-
|
| 76 |
language_model.new_system_message(content=system_message)
|
| 77 |
language_model.new_user_message(content=user_message)
|
| 78 |
language_model.send_message()
|
|
@@ -94,6 +59,7 @@ def click_button(model, api_key, abstract):
|
|
| 94 |
|
| 95 |
return match_score_dict, response_reasoning, logs_df
|
| 96 |
|
|
|
|
| 97 |
def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
|
| 98 |
selected = df.iloc[[evt.index[0]]].iloc[0]
|
| 99 |
name, definition, keywords, does_include, does_not_include = selected['Subsector'], selected['Definition'], selected['Keywords'], selected['Does include'], selected['Does not include']
|
|
|
|
| 9 |
from llm import MessageChatCompletion
|
| 10 |
from customization import css, js
|
| 11 |
from examples import example_1, example_2, example_3, example_4
|
| 12 |
+
from prompt_template import system_message_template, user_message_template
|
| 13 |
|
| 14 |
load_dotenv()
|
| 15 |
|
|
|
|
| 34 |
|
| 35 |
def click_button(model, api_key, abstract):
|
| 36 |
labels = df['Subsector'].tolist()
|
| 37 |
+
prompt_context = [build_context(row) for _, row in df.iterrows()]
|
| 38 |
language_model = MessageChatCompletion(model=model, api_key=api_key)
|
| 39 |
+
system_message = system_message_template.format(prompt_context=prompt_context)
|
| 40 |
+
user_message = user_message_template.format(labels=labels, abstract=abstract)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
language_model.new_system_message(content=system_message)
|
| 42 |
language_model.new_user_message(content=user_message)
|
| 43 |
language_model.send_message()
|
|
|
|
| 59 |
|
| 60 |
return match_score_dict, response_reasoning, logs_df
|
| 61 |
|
| 62 |
+
|
| 63 |
def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
|
| 64 |
selected = df.iloc[[evt.index[0]]].iloc[0]
|
| 65 |
name, definition, keywords, does_include, does_not_include = selected['Subsector'], selected['Definition'], selected['Keywords'], selected['Does include'], selected['Does not include']
|
prompt_template.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
system_message_template = """
|
| 2 |
+
You are a system designed to classify patent abstracts into one or more subsectors based on their content.
|
| 3 |
+
Each subsector is defined by a unique set of characteristics:
|
| 4 |
+
Name: The name of the subsector.
|
| 5 |
+
Definition: A brief description of the subsector.
|
| 6 |
+
Keywords: Important words associated with the subsector.
|
| 7 |
+
Does include: Elements typically found within the subsector.
|
| 8 |
+
Does not include: Elements typically not found within the subsector.
|
| 9 |
+
Consider 'nan' values as 'not available' or 'not applicable'.
|
| 10 |
+
When classifying an abstract, provide the following:
|
| 11 |
+
## 1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to.
|
| 12 |
+
## 2. Reasoning:
|
| 13 |
+
### Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria.
|
| 14 |
+
### Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract.
|
| 15 |
+
### Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract.
|
| 16 |
+
### If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector.
|
| 17 |
+
## 3. Non-selected Subsectors:
|
| 18 |
+
- If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion.
|
| 19 |
+
## 4. Other Subsectors: You MUST ALWAYS SUGGEST NEW SUBSECTOR LABELS, different from the ones provided by the user. They can be new subsectors or subsets the given subsectors. REMEMBER: This is mandatory
|
| 20 |
+
## 5. Match Score: Inside a markdown code block, provide a PYTHON DICTIONARY containing the match scores for all existing subsector labels and for any new labels suggested in item 4. Each probability should be formatted to show two decimal places.
|
| 21 |
+
<context>
|
| 22 |
+
{prompt_context}
|
| 23 |
+
</context>
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
user_message_template = """
|
| 27 |
+
Classify this patent abstract into one or more labels, then format your response as markdown:
|
| 28 |
+
|
| 29 |
+
<labels>
|
| 30 |
+
{labels}
|
| 31 |
+
</labels>
|
| 32 |
+
|
| 33 |
+
<abstract>
|
| 34 |
+
{abstract}
|
| 35 |
+
</abstract>
|
| 36 |
+
"""
|