georad commited on
Commit
efe2677
·
verified ·
1 Parent(s): 68eb640

Update pages/type_text.py

Browse files
Files changed (1) hide show
  1. pages/type_text.py +4 -23
pages/type_text.py CHANGED
@@ -61,18 +61,11 @@ if len(combined_chapters_rows_indexes_list) == 1:
61
  st.warning("Please select at least one chapter")
62
  #st.write("COMBINED CHAPTERS ROWS INDEXES LIST: ", combined_chapters_rows_indexes_list)
63
  df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False, skiprows = lambda x: x not in combined_chapters_rows_indexes_list)
64
-
65
- #df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", index_col="SBS_Code", usecols=["Long_Description"]) # na_values=['NA']
66
- #df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", usecols=["SBS_Code_Hyphenated","Long_Description"])
67
- #from_line = 850 # Imaging services chapter start, adjust as needed
68
- #to_line = 1250 # Imaging services chapter end, adjust as needed
69
- #nrows = to_line - from_line + 1
70
- #skiprows = list(range(1,from_line - 1))
71
- #df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False, skiprows=skiprows, nrows=nrows)
72
-
73
  #st.write(df_SBS.head(5))
74
  SBScorpus = df_SBS['Long_Description'].values.tolist()
75
 
 
 
76
 
77
 
78
  def get_device_map() -> str:
@@ -101,8 +94,6 @@ placeholder, right_column = st.columns(2)
101
  right_column.button("Reset description", on_click=on_click)
102
 
103
  numMAPPINGS_input = 5
104
- #numMAPPINGS_input = st.text_input("Type number of mappings and hit Enter", key="user_input_numMAPPINGS")
105
- #st.button("Clear text", on_click=on_click)
106
 
107
 
108
  ## Define the Sentence Transformer models
@@ -158,25 +149,15 @@ def load_pipe():
158
  pipe = load_pipe()
159
 
160
 
161
- #mapSBS_button = st.button("Map to SBS codes", on_click=on_click, key="user_clickedSBS")
162
-
163
- INTdesc_embedding = model.encode(INTdesc_input)
164
-
165
  # Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
166
-
167
  SBScorpus_embeddings = model.encode(SBScorpus)
168
 
169
- dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
170
- dfALL = pd.DataFrame.from_dict(dictA)
171
-
172
-
173
  if INTdesc_input is not None and st.button("Map to SBS codes"):
174
- #my_model_results = pipeline("ner", model= "checkpoint-92")
175
  HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
176
  HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
177
  HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
178
-
179
- #if INTdesc_input is not None and mapSBS_button == True: # earlier button version
180
  for i, result in enumerate(HF_model_results_displayed):
181
  dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
182
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
 
61
  st.warning("Please select at least one chapter")
62
  #st.write("COMBINED CHAPTERS ROWS INDEXES LIST: ", combined_chapters_rows_indexes_list)
63
  df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False, skiprows = lambda x: x not in combined_chapters_rows_indexes_list)
 
 
 
 
 
 
 
 
 
64
  #st.write(df_SBS.head(5))
65
  SBScorpus = df_SBS['Long_Description'].values.tolist()
66
 
67
+ dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
68
+ dfALL = pd.DataFrame.from_dict(dictA)
69
 
70
 
71
  def get_device_map() -> str:
 
94
  right_column.button("Reset description", on_click=on_click)
95
 
96
  numMAPPINGS_input = 5
 
 
97
 
98
 
99
  ## Define the Sentence Transformer models
 
149
  pipe = load_pipe()
150
 
151
 
 
 
 
 
152
  # Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
153
+ INTdesc_embedding = model.encode(INTdesc_input)
154
  SBScorpus_embeddings = model.encode(SBScorpus)
155
 
 
 
 
 
156
  if INTdesc_input is not None and st.button("Map to SBS codes"):
 
157
  HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
158
  HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
159
  HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
160
+
 
161
  for i, result in enumerate(HF_model_results_displayed):
162
  dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
163
  dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)