Update pages/type_text.py
Browse files- pages/type_text.py +4 -23
pages/type_text.py
CHANGED
@@ -61,18 +61,11 @@ if len(combined_chapters_rows_indexes_list) == 1:
|
|
61 |
st.warning("Please select at least one chapter")
|
62 |
#st.write("COMBINED CHAPTERS ROWS INDEXES LIST: ", combined_chapters_rows_indexes_list)
|
63 |
df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False, skiprows = lambda x: x not in combined_chapters_rows_indexes_list)
|
64 |
-
|
65 |
-
#df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", index_col="SBS_Code", usecols=["Long_Description"]) # na_values=['NA']
|
66 |
-
#df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", usecols=["SBS_Code_Hyphenated","Long_Description"])
|
67 |
-
#from_line = 850 # Imaging services chapter start, adjust as needed
|
68 |
-
#to_line = 1250 # Imaging services chapter end, adjust as needed
|
69 |
-
#nrows = to_line - from_line + 1
|
70 |
-
#skiprows = list(range(1,from_line - 1))
|
71 |
-
#df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False, skiprows=skiprows, nrows=nrows)
|
72 |
-
|
73 |
#st.write(df_SBS.head(5))
|
74 |
SBScorpus = df_SBS['Long_Description'].values.tolist()
|
75 |
|
|
|
|
|
76 |
|
77 |
|
78 |
def get_device_map() -> str:
|
@@ -101,8 +94,6 @@ placeholder, right_column = st.columns(2)
|
|
101 |
right_column.button("Reset description", on_click=on_click)
|
102 |
|
103 |
numMAPPINGS_input = 5
|
104 |
-
#numMAPPINGS_input = st.text_input("Type number of mappings and hit Enter", key="user_input_numMAPPINGS")
|
105 |
-
#st.button("Clear text", on_click=on_click)
|
106 |
|
107 |
|
108 |
## Define the Sentence Transformer models
|
@@ -158,25 +149,15 @@ def load_pipe():
|
|
158 |
pipe = load_pipe()
|
159 |
|
160 |
|
161 |
-
#mapSBS_button = st.button("Map to SBS codes", on_click=on_click, key="user_clickedSBS")
|
162 |
-
|
163 |
-
INTdesc_embedding = model.encode(INTdesc_input)
|
164 |
-
|
165 |
# Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
|
166 |
-
|
167 |
SBScorpus_embeddings = model.encode(SBScorpus)
|
168 |
|
169 |
-
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
170 |
-
dfALL = pd.DataFrame.from_dict(dictA)
|
171 |
-
|
172 |
-
|
173 |
if INTdesc_input is not None and st.button("Map to SBS codes"):
|
174 |
-
#my_model_results = pipeline("ner", model= "checkpoint-92")
|
175 |
HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
|
176 |
HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
|
177 |
HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
|
178 |
-
|
179 |
-
#if INTdesc_input is not None and mapSBS_button == True: # earlier button version
|
180 |
for i, result in enumerate(HF_model_results_displayed):
|
181 |
dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
|
182 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
|
|
61 |
st.warning("Please select at least one chapter")
|
62 |
#st.write("COMBINED CHAPTERS ROWS INDEXES LIST: ", combined_chapters_rows_indexes_list)
|
63 |
df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False, skiprows = lambda x: x not in combined_chapters_rows_indexes_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
#st.write(df_SBS.head(5))
|
65 |
SBScorpus = df_SBS['Long_Description'].values.tolist()
|
66 |
|
67 |
+
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
68 |
+
dfALL = pd.DataFrame.from_dict(dictA)
|
69 |
|
70 |
|
71 |
def get_device_map() -> str:
|
|
|
94 |
right_column.button("Reset description", on_click=on_click)
|
95 |
|
96 |
numMAPPINGS_input = 5
|
|
|
|
|
97 |
|
98 |
|
99 |
## Define the Sentence Transformer models
|
|
|
149 |
pipe = load_pipe()
|
150 |
|
151 |
|
|
|
|
|
|
|
|
|
152 |
# Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
|
153 |
+
INTdesc_embedding = model.encode(INTdesc_input)
|
154 |
SBScorpus_embeddings = model.encode(SBScorpus)
|
155 |
|
|
|
|
|
|
|
|
|
156 |
if INTdesc_input is not None and st.button("Map to SBS codes"):
|
|
|
157 |
HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
|
158 |
HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
|
159 |
HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
|
160 |
+
|
|
|
161 |
for i, result in enumerate(HF_model_results_displayed):
|
162 |
dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
|
163 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|