Spaces:

georad
/

sbsmapper

Running

App Files Files Community

georad commited on May 3

Commit

08afd6a

verified ·

1 Parent(s): a8dcdde

Update pages/type_text.py

Browse files

Files changed (1) hide show

pages/type_text.py +33 -30

pages/type_text.py CHANGED Viewed

@@ -9,7 +9,7 @@ from sentence_transformers import SentenceTransformer, util
 import os
 os.getenv("HF_TOKEN")
-st.header("map internal descriptions to SBS codes using Sentence Transformer + Reasoning Models")
 st.subheader("Select specific Chapter for quicker results")
 #df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
 df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows_with_total.csv")
@@ -105,7 +105,7 @@ numMAPPINGS_input = 5
 #st.button("Clear text", on_click=on_click)
-## Define the SentTrans models
 st_models = {
     'original model for general domain, fastest: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
     'fine-tuned model for medical domain: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
@@ -119,34 +119,13 @@ st_models = {
 #model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
 #model = SentenceTransformer('clips/mfaq')
-## Create the select box
 selected_st_model = st.selectbox('Choose a Sentence Transformer model:', list(st_models.keys()))
 st.write("Current selection:", selected_st_model)
-## Get the selected model
 SentTrans_model = st_models[selected_st_model]
-## Use the model...
-@st.cache_resource
-def load_model():
-    model = SentenceTransformer(SentTrans_model)
-    return model
-model = load_model()
-#mapSBS_button = st.button("Map to SBS codes", on_click=on_click, key="user_clickedSBS")
-INTdesc_embedding = model.encode(INTdesc_input)
-# Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
-SBScorpus_embeddings = model.encode(SBScorpus)
-#if len(chapter_rows_indexes_list) >1:
-if INTdesc_input is not None:
-    #my_model_results = pipeline("ner", model= "checkpoint-92")
-    HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
-    HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
-    HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
 ## Define the Reasoning models
 rs_models = {
@@ -156,25 +135,49 @@ rs_models = {
     'fine-tuned model for medical domain: Qwen/Qwen2-1.5B-Instruct': 'Qwen/Qwen2-1.5B-Instruct',
 }
-## Create the select box
 selected_rs_model = st.selectbox('Choose a Reasoning model:', list(rs_models.keys()))
 st.write("Current selection:", selected_rs_model)
-## Get the selected model
 Reasoning_model = rs_models[selected_rs_model]
-## Use the model as pipeline ...
 @st.cache_resource
 def load_pipe():
     pipe = pipeline("text-generation", model=Reasoning_model, device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
     return pipe
 pipe = load_pipe()
-mapSBS_button = st.button("Map to SBS codes", key="user_clickedSBS")
 dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
 dfALL = pd.DataFrame.from_dict(dictA)
 if INTdesc_input is not None and mapSBS_button == True:
     for i, result in enumerate(HF_model_results_displayed):
         dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})

 import os
 os.getenv("HF_TOKEN")
+st.header("Map internal descriptions to SBS codes using Sentence Transformer + Reasoning Models")
 st.subheader("Select specific Chapter for quicker results")
 #df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
 df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows_with_total.csv")
 #st.button("Clear text", on_click=on_click)
+## Define the Sentence Transformer models
 st_models = {
     'original model for general domain, fastest: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
     'fine-tuned model for medical domain: all-MiniLM-L6-v2': 'all-MiniLM-L6-v2',
 #model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
 #model = SentenceTransformer('clips/mfaq')
+## Create the select Sentence Transformer box
 selected_st_model = st.selectbox('Choose a Sentence Transformer model:', list(st_models.keys()))
 st.write("Current selection:", selected_st_model)
+## Get the selected SentTrans model
 SentTrans_model = st_models[selected_st_model]
 ## Define the Reasoning models
 rs_models = {
     'fine-tuned model for medical domain: Qwen/Qwen2-1.5B-Instruct': 'Qwen/Qwen2-1.5B-Instruct',
 }
+## Create the select Reasoning box
 selected_rs_model = st.selectbox('Choose a Reasoning model:', list(rs_models.keys()))
 st.write("Current selection:", selected_rs_model)
+## Get the selected Reasoning model
 Reasoning_model = rs_models[selected_rs_model]
+## Load the Sentence Transformer model ...
+@st.cache_resource
+def load_model():
+    model = SentenceTransformer(SentTrans_model)
+    return model
+model = load_model()
+## Load the Reasoning model as pipeline ...
 @st.cache_resource
 def load_pipe():
     pipe = pipeline("text-generation", model=Reasoning_model, device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
     return pipe
 pipe = load_pipe()
+#mapSBS_button = st.button("Map to SBS codes", on_click=on_click, key="user_clickedSBS")
+mapSBS_button = st.button("Map to SBS codes") #, key="user_clickedSBS")
+INTdesc_embedding = model.encode(INTdesc_input)
+# Semantic search, Compute cosine similarity between INTdesc_embedding and SBS descriptions
+SBScorpus_embeddings = model.encode(SBScorpus)
 dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
 dfALL = pd.DataFrame.from_dict(dictA)
+if INTdesc_input is not None and if st.button(...):
+    #my_model_results = pipeline("ner", model= "checkpoint-92")
+    HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
+    HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
+    HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
 if INTdesc_input is not None and mapSBS_button == True:
     for i, result in enumerate(HF_model_results_displayed):
         dictA.update({"Score": "%.4f" % result[0]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[0]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[0]["corpus_id"]]})