Update pages/type_text.py
Browse files- pages/type_text.py +60 -66
pages/type_text.py
CHANGED
@@ -24,9 +24,62 @@ os.getenv("HF_TOKEN")
|
|
24 |
#st.title("Map internal descriptions to SBS codes with Sentence Transformer + Reasoning Models")
|
25 |
#st.subheader("Select specific Chapter for quicker results")
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
#df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
|
@@ -83,35 +136,12 @@ df_SBS = pd.read_csv("SBS_V2_0/Code_Sheet.csv", header=0, skip_blank_lines=False
|
|
83 |
SBScorpus = df_SBS['Long_Description'].values.tolist()
|
84 |
|
85 |
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
86 |
-
st.session_state.should_scroll = True
|
87 |
dfALL = pd.DataFrame.from_dict(dictA)
|
88 |
|
89 |
|
90 |
-
def get_device_map() -> str:
|
91 |
-
return 'cuda' if torch.cuda.is_available() else 'cpu'
|
92 |
-
device = get_device_map() # 'cpu'
|
93 |
-
|
94 |
-
def on_click():
|
95 |
-
st.session_state.user_input = ""
|
96 |
-
|
97 |
-
def make_spinner(text = "In progress..."):
|
98 |
-
with st.spinner(text):
|
99 |
-
yield
|
100 |
-
|
101 |
-
#@st.cache
|
102 |
-
def convert_df(df:pd.DataFrame):
|
103 |
-
return df.to_csv(index=False).encode('utf-8')
|
104 |
-
|
105 |
-
#@st.cache
|
106 |
-
def convert_json(df:pd.DataFrame):
|
107 |
-
result = df.to_json(orient="index")
|
108 |
-
parsed = json.loads(result)
|
109 |
-
json_string = json.dumps(parsed)
|
110 |
-
#st.json(json_string, expanded=True)
|
111 |
-
return json_string
|
112 |
-
|
113 |
#INTdesc_input = st.text_input("Type internal description", key="user_input")
|
114 |
INTdesc_input = st.text_input(r"$\textsf{\Large Type internal description}$", key="user_input")
|
|
|
115 |
|
116 |
placeholder, centerL_column, centerR_column, right_column = st.columns(4)
|
117 |
#placeholder_clicked = placeholder.button("Perform some action", key="user_placeholder")
|
@@ -194,27 +224,9 @@ if INTdesc_input is not None and st.button("Map to SBS codes", key="run_st_model
|
|
194 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
195 |
dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
|
196 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
197 |
-
|
198 |
-
if st.session_state.should_scroll:
|
199 |
-
# Reset the flag for next run
|
200 |
-
st.session_state.should_scroll = False
|
201 |
-
# Add the JavaScript for scrolling
|
202 |
-
st.components.v1.html(
|
203 |
-
"""
|
204 |
-
<script>
|
205 |
-
// Scroll to bottom with highest priority
|
206 |
-
document.addEventListener("DOMContentLoaded", function(event) {
|
207 |
-
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
208 |
-
});
|
209 |
-
// Also try immediately
|
210 |
-
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
211 |
-
</script>
|
212 |
-
""",
|
213 |
-
height=0
|
214 |
-
)
|
215 |
-
|
216 |
st.dataframe(data=dfALL, hide_index=True)
|
217 |
-
|
218 |
|
219 |
display_format = "ask REASONING MODEL: Which, if any, of the following SBS descriptions corresponds best to " + INTdesc_input +"? "
|
220 |
#st.write(display_format)
|
@@ -238,25 +250,7 @@ if INTdesc_input is not None and st.button("Map to SBS codes", key="run_st_model
|
|
238 |
max_new_tokens=256,
|
239 |
)
|
240 |
st.write(outputs[0]["generated_text"][-1]["content"])
|
241 |
-
|
242 |
-
if st.session_state.should_scroll:
|
243 |
-
# Reset the flag for next run
|
244 |
-
st.session_state.should_scroll = False
|
245 |
-
# Add the JavaScript for scrolling
|
246 |
-
st.components.v1.html(
|
247 |
-
"""
|
248 |
-
<script>
|
249 |
-
// Scroll to bottom with highest priority
|
250 |
-
document.addEventListener("DOMContentLoaded", function(event) {
|
251 |
-
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
252 |
-
});
|
253 |
-
// Also try immediately
|
254 |
-
window.parent.scrollTo(0, window.parent.document.body.scrollHeight);
|
255 |
-
</script>
|
256 |
-
""",
|
257 |
-
height=0
|
258 |
-
)
|
259 |
-
|
260 |
|
261 |
bs, b1, b2, b3, bLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
262 |
with b1:
|
|
|
24 |
#st.title("Map internal descriptions to SBS codes with Sentence Transformer + Reasoning Models")
|
25 |
#st.subheader("Select specific Chapter for quicker results")
|
26 |
|
27 |
+
def get_device_map() -> str:
|
28 |
+
return 'cuda' if torch.cuda.is_available() else 'cpu'
|
29 |
+
device = get_device_map() # 'cpu'
|
30 |
+
|
31 |
+
def on_click():
|
32 |
+
st.session_state.user_input = ""
|
33 |
+
|
34 |
+
def make_spinner(text = "In progress..."):
|
35 |
+
with st.spinner(text):
|
36 |
+
yield
|
37 |
+
|
38 |
+
#@st.cache
|
39 |
+
def convert_df(df:pd.DataFrame):
|
40 |
+
return df.to_csv(index=False).encode('utf-8')
|
41 |
+
|
42 |
+
#@st.cache
|
43 |
+
def convert_json(df:pd.DataFrame):
|
44 |
+
result = df.to_json(orient="index")
|
45 |
+
parsed = json.loads(result)
|
46 |
+
json_string = json.dumps(parsed)
|
47 |
+
#st.json(json_string, expanded=True)
|
48 |
+
return json_string
|
49 |
+
|
50 |
+
def auto_scroll_to_bottom():
|
51 |
+
# JavaScript to scroll to bottom
|
52 |
+
js_code = """
|
53 |
+
<script>
|
54 |
+
// Wait for the page to fully render
|
55 |
+
const scrollToBottom = () => {
|
56 |
+
// Get the main Streamlit iframe
|
57 |
+
const streamlitDoc = window.parent.document;
|
58 |
+
// Get the app container
|
59 |
+
const appContainer = streamlitDoc.querySelector('.main');
|
60 |
+
if (appContainer) {
|
61 |
+
// Scroll the app container to the bottom
|
62 |
+
appContainer.scrollTop = appContainer.scrollHeight;
|
63 |
+
} else {
|
64 |
+
// Fallback to scrolling the entire page
|
65 |
+
window.parent.scrollTo(0, streamlitDoc.body.scrollHeight);
|
66 |
+
}
|
67 |
+
};
|
68 |
+
|
69 |
+
// Try immediately
|
70 |
+
scrollToBottom();
|
71 |
+
|
72 |
+
// Also try after a short delay to ensure content is rendered
|
73 |
+
setTimeout(scrollToBottom, 200);
|
74 |
+
|
75 |
+
// And after a longer delay just to be safe
|
76 |
+
setTimeout(scrollToBottom, 500);
|
77 |
+
</script>
|
78 |
+
"""
|
79 |
+
|
80 |
+
# Render the JavaScript code
|
81 |
+
html = f'<div style="display:none">{js_code}</div>'
|
82 |
+
st.components.v1.html(html, height=0)
|
83 |
|
84 |
|
85 |
#df_chapters = pd.read_csv("SBS_V2_0/Chapter_Index_Rows.csv")
|
|
|
136 |
SBScorpus = df_SBS['Long_Description'].values.tolist()
|
137 |
|
138 |
dictA = {"Score": [], "SBS Code": [], "SBS Description V2.0": []}
|
|
|
139 |
dfALL = pd.DataFrame.from_dict(dictA)
|
140 |
|
141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
#INTdesc_input = st.text_input("Type internal description", key="user_input")
|
143 |
INTdesc_input = st.text_input(r"$\textsf{\Large Type internal description}$", key="user_input")
|
144 |
+
auto_scroll_to_bottom()
|
145 |
|
146 |
placeholder, centerL_column, centerR_column, right_column = st.columns(4)
|
147 |
#placeholder_clicked = placeholder.button("Perform some action", key="user_placeholder")
|
|
|
224 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
225 |
dictA.update({"Score": "%.4f" % result[4]["score"], "SBS Code": df_SBS.loc[df_SBS["Long_Description"] == SBScorpus[result[4]["corpus_id"]],"SBS_Code_Hyphenated"].values[0], "SBS Description V2.0": SBScorpus[result[4]["corpus_id"]]})
|
226 |
dfALL = pd.concat([dfALL, pd.DataFrame([dictA])], ignore_index=True)
|
227 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
st.dataframe(data=dfALL, hide_index=True)
|
229 |
+
auto_scroll_to_bottom()
|
230 |
|
231 |
display_format = "ask REASONING MODEL: Which, if any, of the following SBS descriptions corresponds best to " + INTdesc_input +"? "
|
232 |
#st.write(display_format)
|
|
|
250 |
max_new_tokens=256,
|
251 |
)
|
252 |
st.write(outputs[0]["generated_text"][-1]["content"])
|
253 |
+
auto_scroll_to_bottom()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
bs, b1, b2, b3, bLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
256 |
with b1:
|