wenjun99 commited on
Commit
785d3cd
·
verified ·
1 Parent(s): a26308f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -54
app.py CHANGED
@@ -4,25 +4,25 @@ import numpy as np
4
  import pandas as pd
5
  from streamlit_cropper import st_cropper
6
 
7
- # # Mutation site headers removed 3614,
8
- # mutation_site_headers = [
9
- # 3244, 3297, 3350, 3399, 3455, 3509, 3562,
10
- # 3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039,
11
- # 4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455,
12
- # 4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882
13
- # ]
14
-
15
- # # Thresholds for each mutation site removed 3614: 0.091557752,
16
- # thresholds = pd.Series({
17
- # 3244: 1.094293328, 3297: 0.924916122, 3350: 0.664586629, 3399: 0.91573613,
18
- # 3455: 1.300869714, 3509: 1.821975901, 3562: 1.178862418,
19
- # 3665: 0.298697327, 3720: 0.58379781, 3773: 0.891088481, 3824: 1.145509641,
20
- # 3879: 0.81833191, 3933: 2.93084335, 3985: 1.593758847, 4039: 0.966055013,
21
- # 4089: 1.465671338, 4145: 0.30309335, 4190: 1.321615138, 4245: 1.709752495,
22
- # 4298: 0.868534701, 4349: 1.222907645, 4402: 0.58873557, 4455: 1.185522985,
23
- # 4510: 1.266797682, 4561: 1.109913024, 4615: 1.181106084, 4668: 1.408533949,
24
- # 4720: 0.714151142, 4773: 1.471959437, 4828: 0.95879943, 4882: 1.464503885
25
- # })
26
 
27
  # Mutation site headers reordered: 4402 to 3244, 4882 to 4455
28
  mutation_site_headers = [
@@ -172,6 +172,14 @@ with tab1:
172
  st.dataframe(df_31)
173
  st.download_button("Download as CSV", df_31.to_csv(index=False), "text_32_binary_labels.csv")
174
 
 
 
 
 
 
 
 
 
175
  # st.subheader("Binary Labels (27-bit groups)")
176
  # groups = []
177
  # for i in range(0, len(binary_labels), 27):
@@ -210,56 +218,99 @@ with tab2:
210
  st.download_button("Download CSV", df.to_csv(index=False), "image_binary_labels.csv")
211
 
212
  # Tab 3: EF → Binary
213
- with tab3:
214
  st.write("Upload an Editing Frequency CSV or enter manually:")
215
- st.write("**Note:** Please upload CSV files **without column headers**. Just the 31 editing frequencies per row.")
216
  ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
217
-
 
 
218
  if ef_file:
219
- # Read CSV without headers and assign mutation site headers
220
  ef_df = pd.read_csv(ef_file, header=None)
221
- ef_df.columns = [str(site) for site in mutation_site_headers]
222
  else:
223
- ef_df = pd.DataFrame(columns=[str(site) for site in mutation_site_headers])
224
-
225
 
226
  edited_df = st.data_editor(ef_df, num_rows="dynamic")
227
 
228
  if st.button("Convert to Binary Labels"):
229
- int_map = {str(k): k for k in thresholds.index}
230
- matching_cols = [col for col in edited_df.columns if col in int_map]
231
-
232
  binary_part = pd.DataFrame()
233
- for col in matching_cols:
234
- col_threshold = thresholds[int_map[col]]
235
- binary_part[col] = (edited_df[col].astype(float) >= col_threshold).astype(int)
 
236
 
237
- non_binary_part = edited_df.drop(columns=matching_cols, errors='ignore')
238
- binary_df = pd.concat([non_binary_part, binary_part], axis=1)
239
 
240
  def color_binary(val):
241
  if val == 1: return "background-color: lightgreen"
242
  if val == 0: return "background-color: lightcoral"
243
  return ""
244
 
245
- st.subheader("Binary Labels")
246
- styled = binary_df.style.applymap(color_binary, subset=matching_cols)
247
  st.dataframe(styled)
248
- st.download_button("Download CSV", binary_df.to_csv(index=False), "ef_binary_labels.csv")
249
-
250
- # Convert to bitstrings and strings
251
- binary_strings = []
252
- decoded_strings = []
253
- for _, row in binary_part.iterrows():
254
- bitlist = row.values.tolist()
255
- bitstring = ''.join(str(b) for b in bitlist)
256
- binary_strings.append(bitstring)
257
- decoded_strings.append(binary_labels_to_string(bitlist))
258
-
259
- st.subheader("Binary as Bitstrings")
260
- for b in binary_strings:
261
- st.code(b)
262
-
263
- st.subheader("Decoded Voyager Strings")
264
- for s in decoded_strings:
265
- st.write(s)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import pandas as pd
5
  from streamlit_cropper import st_cropper
6
 
7
+ # Mutation site headers removed 3614,
8
+ mutation_site_headers_actual = [
9
+ 3244, 3297, 3350, 3399, 3455, 3509, 3562,
10
+ 3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039,
11
+ 4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455,
12
+ 4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882
13
+ ]
14
+
15
+ # Thresholds for each mutation site removed 3614: 0.091557752,
16
+ thresholds_actual = pd.Series({
17
+ 3244: 1.094293328, 3297: 0.924916122, 3350: 0.664586629, 3399: 0.91573613,
18
+ 3455: 1.300869714, 3509: 1.821975901, 3562: 1.178862418,
19
+ 3665: 0.298697327, 3720: 0.58379781, 3773: 0.891088481, 3824: 1.145509641,
20
+ 3879: 0.81833191, 3933: 2.93084335, 3985: 1.593758847, 4039: 0.966055013,
21
+ 4089: 1.465671338, 4145: 0.30309335, 4190: 1.321615138, 4245: 1.709752495,
22
+ 4298: 0.868534701, 4349: 1.222907645, 4402: 0.58873557, 4455: 1.185522985,
23
+ 4510: 1.266797682, 4561: 1.109913024, 4615: 1.181106084, 4668: 1.408533949,
24
+ 4720: 0.714151142, 4773: 1.471959437, 4828: 0.95879943, 4882: 1.464503885
25
+ })
26
 
27
  # Mutation site headers reordered: 4402 to 3244, 4882 to 4455
28
  mutation_site_headers = [
 
172
  st.dataframe(df_31)
173
  st.download_button("Download as CSV", df_31.to_csv(index=False), "text_32_binary_labels.csv")
174
 
175
+ # Additional table with ascending mutation site headers (3244 to 4455)
176
+ ascending_headers = sorted([h for h in mutation_site_headers if h <= 4455])
177
+ df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
178
+ st.subheader("Binary Labels (Ascending Order 3244 → 4455)")
179
+ st.dataframe(df_sorted)
180
+ st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
181
+
182
+
183
  # st.subheader("Binary Labels (27-bit groups)")
184
  # groups = []
185
  # for i in range(0, len(binary_labels), 27):
 
218
  st.download_button("Download CSV", df.to_csv(index=False), "image_binary_labels.csv")
219
 
220
  # Tab 3: EF → Binary
221
+ with st.tabs(["Text to Binary Labels", "Image to Binary Labels", "EF → Binary"])[2]:
222
  st.write("Upload an Editing Frequency CSV or enter manually:")
223
+ st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4455.")
224
  ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
225
+
226
+ ascending_input_headers = sorted([h for h in mutation_site_headers if 3244 <= h <= 4455])
227
+
228
  if ef_file:
 
229
  ef_df = pd.read_csv(ef_file, header=None)
230
+ ef_df.columns = [str(site) for site in ascending_input_headers]
231
  else:
232
+ ef_df = pd.DataFrame(columns=[str(site) for site in ascending_input_headers])
 
233
 
234
  edited_df = st.data_editor(ef_df, num_rows="dynamic")
235
 
236
  if st.button("Convert to Binary Labels"):
237
+ # Use ascending headers to create binary first
 
 
238
  binary_part = pd.DataFrame()
239
+ for col in ascending_input_headers:
240
+ col_str = str(col)
241
+ threshold = thresholds[col]
242
+ binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
243
 
244
+ # Rearranged for output: custom order from mutation_site_headers
245
+ binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]
246
 
247
  def color_binary(val):
248
  if val == 1: return "background-color: lightgreen"
249
  if val == 0: return "background-color: lightcoral"
250
  return ""
251
 
252
+ st.subheader("Binary Labels (Reordered 4402→3244, 4882→4455)")
253
+ styled = binary_reordered.style.applymap(color_binary)
254
  st.dataframe(styled)
255
+ st.download_button("Download CSV", binary_reordered.to_csv(index=False), "ef_binary_labels.csv")
256
+
257
+ # Reconstruct original string from binary values (flatten row-wise)
258
+ for i, row in binary_reordered.iterrows():
259
+ binary_sequence = row.tolist()
260
+ text = binary_labels_to_string(binary_sequence)
261
+ st.write(f"Row {i+1} decoded string: {text}")
262
+
263
+ # # Tab 3: EF → Binary
264
+ # with tab3:
265
+ # st.write("Upload an Editing Frequency CSV or enter manually:")
266
+ # st.write("**Note:** Please upload CSV files **without column headers**. Just the 31 editing frequencies per row.")
267
+ # ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
268
+
269
+ # if ef_file:
270
+ # # Read CSV without headers and assign mutation site headers
271
+ # ef_df = pd.read_csv(ef_file, header=None)
272
+ # ef_df.columns = [str(site) for site in mutation_site_headers]
273
+ # else:
274
+ # ef_df = pd.DataFrame(columns=[str(site) for site in mutation_site_headers])
275
+
276
+
277
+ # edited_df = st.data_editor(ef_df, num_rows="dynamic")
278
+
279
+ # if st.button("Convert to Binary Labels"):
280
+ # int_map = {str(k): k for k in thresholds.index}
281
+ # matching_cols = [col for col in edited_df.columns if col in int_map]
282
+
283
+ # binary_part = pd.DataFrame()
284
+ # for col in matching_cols:
285
+ # col_threshold = thresholds[int_map[col]]
286
+ # binary_part[col] = (edited_df[col].astype(float) >= col_threshold).astype(int)
287
+
288
+ # non_binary_part = edited_df.drop(columns=matching_cols, errors='ignore')
289
+ # binary_df = pd.concat([non_binary_part, binary_part], axis=1)
290
+
291
+ # def color_binary(val):
292
+ # if val == 1: return "background-color: lightgreen"
293
+ # if val == 0: return "background-color: lightcoral"
294
+ # return ""
295
+
296
+ # st.subheader("Binary Labels")
297
+ # styled = binary_df.style.applymap(color_binary, subset=matching_cols)
298
+ # st.dataframe(styled)
299
+ # st.download_button("Download CSV", binary_df.to_csv(index=False), "ef_binary_labels.csv")
300
+
301
+ # # Convert to bitstrings and strings
302
+ # binary_strings = []
303
+ # decoded_strings = []
304
+ # for _, row in binary_part.iterrows():
305
+ # bitlist = row.values.tolist()
306
+ # bitstring = ''.join(str(b) for b in bitlist)
307
+ # binary_strings.append(bitstring)
308
+ # decoded_strings.append(binary_labels_to_string(bitlist))
309
+
310
+ # st.subheader("Binary as Bitstrings")
311
+ # for b in binary_strings:
312
+ # st.code(b)
313
+
314
+ # st.subheader("Decoded Voyager Strings")
315
+ # for s in decoded_strings:
316
+ # st.write(s)