georad commited on
Commit
08afd6a
·
verified ·
1 Parent(s): a8dcdde

Update pages/type_text.py

Browse files
Files changed (1) hide show
  1. pages/type_text.py +33 -30
pages/type_text.py CHANGED
@@ -9,7 +9,7 @@ from sentence_transformers import SentenceTransformer, util
9
  import os
10
  os.getenv("HF_TOKEN")
11
 
12
- st.header("map internal descriptions to SBS codes using Sentence Transformer + Reasoning Models")
13
  st.subheader("Select specific Chapter for quicker results")
14
  #df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
15
  df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows_with_total.csv")
@@ -105,7 +105,7 @@ numMAPPINGS_input = 5
105
  #st.button("Clear text", on_click=on_click)
106
 
107
 
108
- ## Define the SentTrans models
109
  st_models = {
110
  'original model for general domain, fastest: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
111
  'fine-tuned model for medical domain: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
@@ -119,34 +119,13 @@ st_models = {
119
  #model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
120
  #model = SentenceTransformer('clips/mfaq')
121
 
122
- ## Create the select box
123
  selected_st_model = st.selectbox('Choose a Sentence Transformer model:', list(st_models.keys()))
124
  st.write("Current selection:", selected_st_model)
125
 
126
- ## Get the selected model
127
  SentTrans_model = st_models[selected_st_model]
128
-
129
- ## Use the model...
130
- @st.cache_resource
131
- def load_model():
132
- model = SentenceTransformer(SentTrans_model)
133
- return model
134
- model = load_model()
135
 
136
- #mapSBS_button = st.button("Map to SBS codes", on_click=on_click, key="user_clickedSBS")
137
-
138
- INTdesc_embedding = model.encode(INTdesc_input)
139
-
140
- # Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
141
-
142
- SBScorpus_embeddings = model.encode(SBScorpus)
143
-
144
- #if len(chapter_rows_indexes_list) >1:
145
- if INTdesc_input is not None:
146
- #my_model_results = pipeline("ner", model= "checkpoint-92")
147
- HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
148
- HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
149
- HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
150
 
151
  ## Define the Reasoning models
152
  rs_models = {
@@ -156,25 +135,49 @@ rs_models = {
156
  'fine-tuned model for medical domain: Qwen/Qwen2-1.5B-Instruct': 'Qwen/Qwen2-1.5B-Instruct',
157
  }
158
 
159
- ## Create the select box
160
  selected_rs_model = st.selectbox('Choose a Reasoning model:', list(rs_models.keys()))
161
  st.write("Current selection:", selected_rs_model)
162
 
163
- ## Get the selected model
164
  Reasoning_model = rs_models[selected_rs_model]
165
-
166
- ## Use the model as pipeline ...
 
 
 
 
 
 
 
 
167
  @st.cache_resource
168
  def load_pipe():
169
  pipe = pipeline("text-generation", model=Reasoning_model, device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
170
  return pipe
171
  pipe = load_pipe()
172
 
173
- mapSBS_button = st.button("Map to SBS codes", key="user_clickedSBS")
 
 
 
 
 
 
 
 
174
 
175
  dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
176
  dfALL = pd.DataFrame.from_dict(dictA)
177
 
 
 
 
 
 
 
 
 
178
  if INTdesc_input is not None and mapSBS_button == True:
179
  for i, result in enumerate(HF_model_results_displayed):
180
  dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})
 
9
  import os
10
  os.getenv("HF_TOKEN")
11
 
12
+ st.header("Map internal descriptions to SBS codes using Sentence Transformer + Reasoning Models")
13
  st.subheader("Select specific Chapter for quicker results")
14
  #df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
15
  df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows_with_total.csv")
 
105
  #st.button("Clear text", on_click=on_click)
106
 
107
 
108
+ ## Define the Sentence Transformer models
109
  st_models = {
110
  'original model for general domain, fastest: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
111
  'fine-tuned model for medical domain: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
 
119
  #model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
120
  #model = SentenceTransformer('clips/mfaq')
121
 
122
+ ## Create the select Sentence Transformer box
123
  selected_st_model = st.selectbox('Choose a Sentence Transformer model:', list(st_models.keys()))
124
  st.write("Current selection:", selected_st_model)
125
 
126
+ ## Get the selected SentTrans model
127
  SentTrans_model = st_models[selected_st_model]
 
 
 
 
 
 
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  ## Define the Reasoning models
131
  rs_models = {
 
135
  'fine-tuned model for medical domain: Qwen/Qwen2-1.5B-Instruct': 'Qwen/Qwen2-1.5B-Instruct',
136
  }
137
 
138
+ ## Create the select Reasoning box
139
  selected_rs_model = st.selectbox('Choose a Reasoning model:', list(rs_models.keys()))
140
  st.write("Current selection:", selected_rs_model)
141
 
142
+ ## Get the selected Reasoning model
143
  Reasoning_model = rs_models[selected_rs_model]
144
+
145
+
146
+ ## Load the Sentence Transformer model ...
147
+ @st.cache_resource
148
+ def load_model():
149
+ model = SentenceTransformer(SentTrans_model)
150
+ return model
151
+ model = load_model()
152
+
153
+ ## Load the Reasoning model as pipeline ...
154
  @st.cache_resource
155
  def load_pipe():
156
  pipe = pipeline("text-generation", model=Reasoning_model, device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
157
  return pipe
158
  pipe = load_pipe()
159
 
160
+
161
+ #mapSBS_button = st.button("Map to SBS codes", on_click=on_click, key="user_clickedSBS")
162
+ mapSBS_button = st.button("Map to SBS codes") #, key="user_clickedSBS")
163
+
164
+ INTdesc_embedding = model.encode(INTdesc_input)
165
+
166
+ # Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
167
+
168
+ SBScorpus_embeddings = model.encode(SBScorpus)
169
 
170
  dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
171
  dfALL = pd.DataFrame.from_dict(dictA)
172
 
173
+
174
+ if INTdesc_input is not None and if st.button(...):
175
+ #my_model_results = pipeline("ner", model= "checkpoint-92")
176
+ HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
177
+ HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
178
+ HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
179
+
180
+
181
  if INTdesc_input is not None and mapSBS_button == True:
182
  for i, result in enumerate(HF_model_results_displayed):
183
  dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})