Update pages/type_text.py
Browse files- pages/type_text.py +44 -29
pages/type_text.py
CHANGED
@@ -23,6 +23,12 @@ os.getenv("HF_TOKEN")
|
|
23 |
|
24 |
#st.title("Map internal descriptions to SBS codes with Sentence Transformer + Reasoning Models")
|
25 |
#st.subheader("Select specific Chapter for quicker results")
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
#df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
|
27 |
df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows_with_total.csv")
|
28 |
|
@@ -77,6 +83,7 @@ df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False
|
|
77 |
SBScorpus = df_SBS['Long_Description'].values.tolist()
|
78 |
|
79 |
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
|
|
80 |
dfALL = pd.DataFrame.from_dict(dictA)
|
81 |
|
82 |
|
@@ -171,8 +178,6 @@ pipe = load_pipe()
|
|
171 |
INTdesc_embedding = model.encode(INTdesc_input)
|
172 |
SBScorpus_embeddings = model.encode(SBScorpus)
|
173 |
|
174 |
-
|
175 |
-
|
176 |
if INTdesc_input is not None and st.button("Map to SBS codes", key="run_st_model"):
|
177 |
HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
|
178 |
HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
|
@@ -189,22 +194,27 @@ if INTdesc_input is not None and st.button("Map to SBS codes", key="run_st_model
|
|
189 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
190 |
dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
|
191 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
st.dataframe(data=dfALL, hide_index=True)
|
194 |
|
195 |
-
# JavaScript component too automatically scroll to bottom of page
|
196 |
-
st.components.v1.html(
|
197 |
-
"""
|
198 |
-
<script>
|
199 |
-
window.onload = function() {
|
200 |
-
window.scrollTo(0, document.body.scrollHeight);
|
201 |
-
}
|
202 |
-
// For immediate execution as well
|
203 |
-
window.scrollTo(0, document.body.scrollHeight);
|
204 |
-
</script>
|
205 |
-
""",
|
206 |
-
height=0
|
207 |
-
)
|
208 |
|
209 |
display_format = "ask REASONING MODEL: Which, if any, of the following SBS descriptions corresponds best to " + INTdesc_input +"? "
|
210 |
#st.write(display_format)
|
@@ -229,19 +239,24 @@ if INTdesc_input is not None and st.button("Map to SBS codes", key="run_st_model
|
|
229 |
)
|
230 |
st.write(outputs[0]["generated_text"][-1]["content"])
|
231 |
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
bs, b1, b2, b3, bLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
247 |
with b1:
|
|
|
23 |
|
24 |
#st.title("Map internal descriptions to SBS codes with Sentence Transformer + Reasoning Models")
|
25 |
#st.subheader("Select specific Chapter for quicker results")
|
26 |
+
|
27 |
+
# Initialize session state if not already done
|
28 |
+
if 'should_scroll' not in st.session_state:
|
29 |
+
st.session_state.should_scroll = False
|
30 |
+
|
31 |
+
|
32 |
#df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
|
33 |
df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows_with_total.csv")
|
34 |
|
|
|
83 |
SBScorpus = df_SBS['Long_Description'].values.tolist()
|
84 |
|
85 |
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
86 |
+
st.session_state.should_scroll = True
|
87 |
dfALL = pd.DataFrame.from_dict(dictA)
|
88 |
|
89 |
|
|
|
178 |
INTdesc_embedding = model.encode(INTdesc_input)
|
179 |
SBScorpus_embeddings = model.encode(SBScorpus)
|
180 |
|
|
|
|
|
181 |
if INTdesc_input is not None and st.button("Map to SBS codes", key="run_st_model"):
|
182 |
HF_model_results = util.semantic_search(INTdesc_embedding, SBScorpus_embeddings)
|
183 |
HF_model_results_sorted = sorted(HF_model_results, key=lambda x: x[1], reverse=True)
|
|
|
194 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
195 |
dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
|
196 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
197 |
+
|
198 |
+
if st.session_state.should_scroll:
|
199 |
+
# Reset the flag for next run
|
200 |
+
st.session_state.should_scroll = False
|
201 |
+
# Add the JavaScript for scrolling
|
202 |
+
st.components.v1.html(
|
203 |
+
"""
|
204 |
+
<script>
|
205 |
+
// Scroll to bottom with highest priority
|
206 |
+
document.addEventListener("DOMContentLoaded", function(event) {
|
207 |
+
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
208 |
+
});
|
209 |
+
// Also try immediately
|
210 |
+
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
211 |
+
</script>
|
212 |
+
""",
|
213 |
+
height=0
|
214 |
+
)
|
215 |
+
|
216 |
st.dataframe(data=dfALL, hide_index=True)
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
display_format = "ask REASONING MODEL: Which, if any, of the following SBS descriptions corresponds best to " + INTdesc_input +"? "
|
220 |
#st.write(display_format)
|
|
|
239 |
)
|
240 |
st.write(outputs[0]["generated_text"][-1]["content"])
|
241 |
|
242 |
+
if st.session_state.should_scroll:
|
243 |
+
# Reset the flag for next run
|
244 |
+
st.session_state.should_scroll = False
|
245 |
+
# Add the JavaScript for scrolling
|
246 |
+
st.components.v1.html(
|
247 |
+
"""
|
248 |
+
<script>
|
249 |
+
// Scroll to bottom with highest priority
|
250 |
+
document.addEventListener("DOMContentLoaded", function(event) {
|
251 |
+
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
252 |
+
});
|
253 |
+
// Also try immediately
|
254 |
+
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
255 |
+
</script>
|
256 |
+
""",
|
257 |
+
height=0
|
258 |
+
)
|
259 |
+
|
260 |
|
261 |
bs, b1, b2, b3, bLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
262 |
with b1:
|