Update pages/type_text.py
Browse files- pages/type_text.py +7 -35
pages/type_text.py
CHANGED
@@ -74,14 +74,11 @@ HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
|
|
74 |
HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
|
75 |
HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
#col1.subheader("Score")
|
83 |
-
#col2.subheader("SBS code")
|
84 |
-
#col3.subheader("SBS description V2.0")
|
85 |
|
86 |
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
87 |
dfALL = pd.DataFrame.from_dict(dictA)
|
@@ -89,45 +86,20 @@ dfALL = pd.DataFrame.from_dict(dictA)
|
|
89 |
if INTdesc_input is not None and createSBScodes_clicked == True:
|
90 |
#for i, result in enumerate(HF_model_results_displayed):
|
91 |
for result in HF_model_results_displayed:
|
92 |
-
#col1.write("%.4f" % result[0]["score"])
|
93 |
-
#col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
|
94 |
-
#col3.write(SBScorpus[result[0]["corpus_id"]])
|
95 |
-
#dictA["Score"].append("%.4f" % result[0]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[0]["corpus_id"]])
|
96 |
dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
|
97 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
98 |
-
|
99 |
-
#col1.write("%.4f" % result[1]["score"])
|
100 |
-
#col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
|
101 |
-
#col3.write(SBScorpus[result[1]["corpus_id"]])
|
102 |
-
#dictA["Score"].append("%.4f" % result[1]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[1]["corpus_id"]])
|
103 |
dictA.update({"Score": "%.4f" % result[1]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[1]["corpus_id"]]})
|
104 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
105 |
-
|
106 |
-
#col1.write("%.4f" % result[2]["score"])
|
107 |
-
#col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
|
108 |
-
#col3.write(SBScorpus[result[2]["corpus_id"]])
|
109 |
-
#dictA["Score"].append("%.4f" % result[2]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[2]["corpus_id"]])
|
110 |
dictA.update({"Score": "%.4f" % result[2]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[2]["corpus_id"]]})
|
111 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
112 |
-
|
113 |
-
#col1.write("%.4f" % result[3]["score"])
|
114 |
-
#col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
|
115 |
-
#col3.write(SBScorpus[result[3]["corpus_id"]])
|
116 |
-
#dictA["Score"].append("%.4f" % result[3]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[3]["corpus_id"]])
|
117 |
dictA.update({"Score": "%.4f" % result[3]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[3]["corpus_id"]]})
|
118 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
119 |
-
|
120 |
-
#col1.write("%.4f" % result[4]["score"])
|
121 |
-
#col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
|
122 |
-
#col3.write(SBScorpus[result[4]["corpus_id"]])
|
123 |
-
#dictA["Score"].append("%.4f" % result[4]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[4]["corpus_id"]])
|
124 |
dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
|
125 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
126 |
-
|
127 |
st.dataframe(data=dfALL, hide_index=True)
|
128 |
-
#st.write(dfALL)
|
129 |
|
130 |
-
display_format = "ask REASONING MODEL: Which, if any, of the above
|
131 |
st.write(display_format)
|
132 |
question = "Which, if any, of the below Saudi Billing System descriptions corresponds best to " + INTdesc_input +"? "
|
133 |
shortlist = [SBScorpus[result[0]["corpus_id"]], SBScorpus[result[1]["corpus_id"]], SBScorpus[result[2]["corpus_id"]], SBScorpus[result[3]["corpus_id"]], SBScorpus[result[4]["corpus_id"]]]
|
|
|
74 |
HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
|
75 |
HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
|
76 |
|
77 |
+
@st.cache_resource
|
78 |
+
def load_model_pipe():
|
79 |
+
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B-Instruct", device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
|
80 |
+
return pipe
|
81 |
+
load_model_pipe()
|
|
|
|
|
|
|
82 |
|
83 |
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
84 |
dfALL = pd.DataFrame.from_dict(dictA)
|
|
|
86 |
if INTdesc_input is not None and createSBScodes_clicked == True:
|
87 |
#for i, result in enumerate(HF_model_results_displayed):
|
88 |
for result in HF_model_results_displayed:
|
|
|
|
|
|
|
|
|
89 |
dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
|
90 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
91 |
dictA.update({"Score": "%.4f" % result[1]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[1]["corpus_id"]]})
|
92 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
93 |
dictA.update({"Score": "%.4f" % result[2]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[2]["corpus_id"]]})
|
94 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
95 |
dictA.update({"Score": "%.4f" % result[3]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[3]["corpus_id"]]})
|
96 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
97 |
dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
|
98 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
99 |
+
|
100 |
st.dataframe(data=dfALL, hide_index=True)
|
|
|
101 |
|
102 |
+
display_format = "ask REASONING MODEL: Which, if any, of the above SBS descriptions corresponds best to " + INTdesc_input +"? "
|
103 |
st.write(display_format)
|
104 |
question = "Which, if any, of the below Saudi Billing System descriptions corresponds best to " + INTdesc_input +"? "
|
105 |
shortlist = [SBScorpus[result[0]["corpus_id"]], SBScorpus[result[1]["corpus_id"]], SBScorpus[result[2]["corpus_id"]], SBScorpus[result[3]["corpus_id"]], SBScorpus[result[4]["corpus_id"]]]
|