georad commited on
Commit
a153606
·
verified ·
1 Parent(s): 6dfd1dc

Update pages/type_text.py

Browse files
Files changed (1) hide show
  1. pages/type_text.py +7 -35
pages/type_text.py CHANGED
@@ -74,14 +74,11 @@ HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
74
  HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
75
  HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
76
 
77
- model_id = "meta-llama/Llama-3.2-1B-Instruct"
78
- pipe = pipeline("text-generation", model=model_id, device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
79
-
80
-
81
- #col1, col2, col3 = st.columns([1,1,2.5])
82
- #col1.subheader("Score")
83
- #col2.subheader("SBS code")
84
- #col3.subheader("SBS description V2.0")
85
 
86
  dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
87
  dfALL = pd.DataFrame.from_dict(dictA)
@@ -89,45 +86,20 @@ dfALL = pd.DataFrame.from_dict(dictA)
89
  if INTdesc_input is not None and createSBScodes_clicked == True:
90
  #for i, result in enumerate(HF_model_results_displayed):
91
  for result in HF_model_results_displayed:
92
- #col1.write("%.4f" % result[0]["score"])
93
- #col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
94
- #col3.write(SBScorpus[result[0]["corpus_id"]])
95
- #dictA["Score"].append("%.4f" % result[0]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[0]["corpus_id"]])
96
  dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
97
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
98
-
99
- #col1.write("%.4f" % result[1]["score"])
100
- #col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
101
- #col3.write(SBScorpus[result[1]["corpus_id"]])
102
- #dictA["Score"].append("%.4f" % result[1]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[1]["corpus_id"]])
103
  dictA.update({"Score": "%.4f" % result[1]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[1]["corpus_id"]]})
104
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
105
-
106
- #col1.write("%.4f" % result[2]["score"])
107
- #col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
108
- #col3.write(SBScorpus[result[2]["corpus_id"]])
109
- #dictA["Score"].append("%.4f" % result[2]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[2]["corpus_id"]])
110
  dictA.update({"Score": "%.4f" % result[2]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[2]["corpus_id"]]})
111
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
112
-
113
- #col1.write("%.4f" % result[3]["score"])
114
- #col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
115
- #col3.write(SBScorpus[result[3]["corpus_id"]])
116
- #dictA["Score"].append("%.4f" % result[3]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[3]["corpus_id"]])
117
  dictA.update({"Score": "%.4f" % result[3]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[3]["corpus_id"]]})
118
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
119
-
120
- #col1.write("%.4f" % result[4]["score"])
121
- #col2.write(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0])
122
- #col3.write(SBScorpus[result[4]["corpus_id"]])
123
- #dictA["Score"].append("%.4f" % result[4]["score"]), dictA["SBS Code"].append(df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0]), dictA["SBS Description V2.0"].append(SBScorpus[result[4]["corpus_id"]])
124
  dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
125
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
126
-
127
  st.dataframe(data=dfALL, hide_index=True)
128
- #st.write(dfALL)
129
 
130
- display_format = "ask REASONING MODEL: Which, if any, of the above Saudi Billing System descriptions corresponds best to " + INTdesc_input +"? "
131
  st.write(display_format)
132
  question = "Which, if any, of the below Saudi Billing System descriptions corresponds best to " + INTdesc_input +"? "
133
  shortlist = [SBScorpus[result[0]["corpus_id"]], SBScorpus[result[1]["corpus_id"]], SBScorpus[result[2]["corpus_id"]], SBScorpus[result[3]["corpus_id"]], SBScorpus[result[4]["corpus_id"]]]
 
74
  HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
75
  HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
76
 
77
+ @st.cache_resource
78
+ def load_model_pipe():
79
+ pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B-Instruct", device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
80
+ return pipe
81
+ load_model_pipe()
 
 
 
82
 
83
  dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
84
  dfALL = pd.DataFrame.from_dict(dictA)
 
86
  if INTdesc_input is not None and createSBScodes_clicked == True:
87
  #for i, result in enumerate(HF_model_results_displayed):
88
  for result in HF_model_results_displayed:
 
 
 
 
89
  dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
90
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
 
 
 
 
 
91
  dictA.update({"Score": "%.4f" % result[1]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[1]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[1]["corpus_id"]]})
92
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
 
 
 
 
 
93
  dictA.update({"Score": "%.4f" % result[2]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[2]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[2]["corpus_id"]]})
94
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
 
 
 
 
 
95
  dictA.update({"Score": "%.4f" % result[3]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[3]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[3]["corpus_id"]]})
96
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
 
 
 
 
 
97
  dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
98
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
99
+
100
  st.dataframe(data=dfALL, hide_index=True)
 
101
 
102
+ display_format = "ask REASONING MODEL: Which, if any, of the above SBS descriptions corresponds best to " + INTdesc_input +"? "
103
  st.write(display_format)
104
  question = "Which, if any, of the below Saudi Billing System descriptions corresponds best to " + INTdesc_input +"? "
105
  shortlist = [SBScorpus[result[0]["corpus_id"]], SBScorpus[result[1]["corpus_id"]], SBScorpus[result[2]["corpus_id"]], SBScorpus[result[3]["corpus_id"]], SBScorpus[result[4]["corpus_id"]]]