Spaces:

georad
/

sbsmapper

Running

App Files Files Community

georad commited on May 3

Commit

5416b46

verified ·

1 Parent(s): f6b860f

Update pages/type_text.py

Browse files

Files changed (1) hide show

pages/type_text.py +28 -15

pages/type_text.py CHANGED Viewed

@@ -111,10 +111,16 @@ st_models = {
     'original model for general domain, best performance: all-mpnet-base-v2': 'all-mpnet-base-v2',
     'fine-tuned model for medical domain: all-mpnet-base-v2': 'all-mpnet-base-v2',
 }
 ## Create the select box
-selected_st_model = st.selectbox('Choose a model:', list(st_models.keys()))
-st.write("You selected:", selected_st_model)
 ## Get the selected model
 SentTrans_model = st_models[selected_st_model]
@@ -126,14 +132,6 @@ def load_model():
     return model
 model = load_model()
-#model = SentenceTransformer('all-MiniLM-L6-v2') # fastest
-#model = SentenceTransformer('all-mpnet-base-v2') # best performance
-#model = SentenceTransformers('all-distilroberta-v1')
-#model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
-#model = SentenceTransformer('clips/mfaq')
 INTdesc_embedding = model.encode(INTdesc_input)
@@ -148,13 +146,28 @@ HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
 HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
 HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
 @st.cache_resource
 def load_pipe():
-    pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B-Instruct", device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
     return pipe
 pipe = load_pipe()
-#pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B-Instruct", device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
 dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
 dfALL = pd.DataFrame.from_dict(dictA)
@@ -174,13 +187,13 @@ if INTdesc_input is not None and createSBScodes_clicked == True:
     st.dataframe(data=dfALL, hide_index=True)
-    display_format = "ask REASONING MODEL: Which, if any, of the above SBS descriptions corresponds best to " + INTdesc_input +"? "
     #st.write(display_format)
     question = "Which one, if any, of the following Saudi Billing System descriptions A, B, C, D, or E corresponds best to " + INTdesc_input +"? "
     shortlist = [SBScorpus[result[0]["corpus_id"]], SBScorpus[result[1]["corpus_id"]], SBScorpus[result[2]["corpus_id"]], SBScorpus[result[3]["corpus_id"]], SBScorpus[result[4]["corpus_id"]]]
     prompt = question + " " +"A: "+ shortlist[0] + " " +"B: " + shortlist[1] + " " + "C: " + shortlist[2] + " " + "D: " + shortlist[3] + " " + "E: " + shortlist[4]
     st.write(prompt)
     messages = [
     {"role": "system", "content": "You are a knowledgable AI assistant who always answers truthfully and precisely!"},
     {"role": "user", "content": prompt},

     'original model for general domain, best performance: all-mpnet-base-v2': 'all-mpnet-base-v2',
     'fine-tuned model for medical domain: all-mpnet-base-v2': 'all-mpnet-base-v2',
 }
+#model = SentenceTransformer('all-MiniLM-L6-v2') # fastest
+#model = SentenceTransformer('all-mpnet-base-v2') # best performance
+#model = SentenceTransformers('all-distilroberta-v1')
+#model = SentenceTransformer('sentence-transformers/msmarco-bert-base-dot-v5')
+#model = SentenceTransformer('clips/mfaq')
 ## Create the select box
+selected_st_model = st.selectbox('Choose a Sentence Transformer model:', list(st_models.keys()))
+st.write("Current selection:", selected_st_model)
 ## Get the selected model
 SentTrans_model = st_models[selected_st_model]
     return model
 model = load_model()
 INTdesc_embedding = model.encode(INTdesc_input)
 HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
 HF_model_results_displayed = HF_model_results_sorted[0:numMAPPINGS_input]
+## Define the Reasoning models
+rs_models = {
+    'original model for general domain, faster: meta-llama/Llama-3.2-1B-Instruct': 'meta-llama/Llama-3.2-1B-Instruct',
+    'fine-tuned model for medical domain: meta-llama/Llama-3.2-1B-Instruct': 'meta-llama/Llama-3.2-1B-Instruct',
+    'original model for general domain, slower: Qwen/Qwen2-1.5B-Instruct': 'Qwen/Qwen2-1.5B-Instruct',
+    'fine-tuned model for medical domain: Qwen/Qwen2-1.5B-Instruct': 'Qwen/Qwen2-1.5B-Instruct',
+}
+## Create the select box
+selected_rs_model = st.selectbox('Choose a Reasoning model:', list(st_models.keys()))
+st.write("Current selection:", selected_rs_model)
+## Get the selected model
+Reasoning_model = rs_models[selected_rs_model]
+## Use the model as pipeline ...
 @st.cache_resource
 def load_pipe():
+    pipe = pipeline("text-generation", model=Reasoning_model, device_map=device,) # device_map="auto", torch_dtype=torch.bfloat16
     return pipe
 pipe = load_pipe()
 dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
 dfALL = pd.DataFrame.from_dict(dictA)
     st.dataframe(data=dfALL, hide_index=True)
+    display_format = "ask REASONING MODEL: Which, if any, of the following SBS descriptions corresponds best to " + INTdesc_input +"? "
     #st.write(display_format)
     question = "Which one, if any, of the following Saudi Billing System descriptions A, B, C, D, or E corresponds best to " + INTdesc_input +"? "
     shortlist = [SBScorpus[result[0]["corpus_id"]], SBScorpus[result[1]["corpus_id"]], SBScorpus[result[2]["corpus_id"]], SBScorpus[result[3]["corpus_id"]], SBScorpus[result[4]["corpus_id"]]]
     prompt = question + " " +"A: "+ shortlist[0] + " " +"B: " + shortlist[1] + " " + "C: " + shortlist[2] + " " + "D: " + shortlist[3] + " " + "E: " + shortlist[4]
     st.write(prompt)
     messages = [
     {"role": "system", "content": "You are a knowledgable AI assistant who always answers truthfully and precisely!"},
     {"role": "user", "content": prompt},