wenjun99 commited on
Commit
223c1c6
·
verified ·
1 Parent(s): 3afbb18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -146
app.py CHANGED
@@ -33,16 +33,8 @@ mutation_site_headers = [
33
  ]
34
 
35
  # Thresholds reordered accordingly
36
- thresholds = pd.Series({
37
- 4402: 0.58873557, 4349: 1.222907645, 4298: 0.868534701, 4245: 1.709752495,
38
- 4190: 1.321615138, 4145: 0.30309335, 4089: 1.465671338, 4039: 0.966055013,
39
- 3985: 1.593758847, 3933: 2.93084335, 3879: 0.81833191, 3824: 1.145509641,
40
- 3773: 0.891088481, 3720: 0.58379781, 3665: 0.298697327,
41
- 3562: 1.178862418, 3509: 1.821975901, 3455: 1.300869714, 3399: 0.91573613,
42
- 3350: 0.664586629, 3297: 0.924916122, 3244: 1.094293328,
43
- 4882: 1.464503885, 4828: 0.95879943, 4773: 1.471959437, 4720: 0.714151142,
44
- 4668: 1.408533949, 4615: 1.181106084, 4561: 1.109913024, 4510: 1.266797682, 4455: 1.185522985
45
- })
46
  # === Utility functions ===
47
 
48
  # Voyager ASCII 6-bit conversion table
@@ -58,8 +50,6 @@ voyager_table = {
58
  }
59
  reverse_voyager_table = {v: k for k, v in voyager_table.items()}
60
 
61
- # === Utility functions ===
62
-
63
  def string_to_binary_labels(s: str) -> list[int]:
64
  bits = []
65
  for char in s:
@@ -77,25 +67,7 @@ def binary_labels_to_string(bits: list[int]) -> str:
77
  val = sum(b << (5 - j) for j, b in enumerate(chunk))
78
  chars.append(voyager_table.get(val, '?'))
79
  return ''.join(chars)
80
-
81
- # def string_to_binary_labels(s: str) -> list[int]:
82
- # bits = []
83
- # for char in s:
84
- # ascii_code = ord(char)
85
- # char_bits = [(ascii_code >> bit) & 1 for bit in range(7, -1, -1)]
86
- # bits.extend(char_bits)
87
- # return bits
88
-
89
- # def binary_labels_to_string(bits: list[int]) -> str:
90
- # chars = []
91
- # for i in range(0, len(bits), 8):
92
- # byte = bits[i:i+8]
93
- # if len(byte) < 8:
94
- # byte += [0] * (8 - len(byte))
95
- # ascii_val = sum(b << (7 - j) for j, b in enumerate(byte))
96
- # chars.append(chr(ascii_val))
97
- # return ''.join(chars)
98
-
99
  def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image:
100
  img = img.convert("RGB")
101
  if img.width < min_size or img.height < min_size:
@@ -172,52 +144,32 @@ with tab1:
172
  st.dataframe(df)
173
  st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv")
174
 
175
- # Additional table with ascending mutation site headers (3244 to 4455)
176
- ascending_headers = sorted([h for h in mutation_site_headers if h <= 4455])
177
  df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
178
- st.subheader("Binary Labels (Ascending Order 3244 → 4455)")
179
  st.dataframe(df_sorted)
180
  st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
181
 
182
-
183
- # st.subheader("Binary Labels (27-bit groups)")
184
- # groups = []
185
- # for i in range(0, len(binary_labels), 27):
186
- # group = binary_labels[i:i+27]
187
- # group += [0] * (27 - len(group))
188
- # groups.append(group + [sum(group)])
189
-
190
- # df_27 = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"])
191
- # st.dataframe(df_27)
192
- # st.download_button("Download as CSV", df_27.to_csv(index=False), "text_27_binary_labels.csv")
193
-
194
- # Tab 3: EF → Binary
195
  with tab2:
196
  st.write("Upload an Editing Frequency CSV or enter manually:")
197
- st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4455.")
198
  ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
199
 
200
- ascending_input_headers = sorted([h for h in mutation_site_headers if 3244 <= h <= 4402])
201
- high_index_headers = sorted([h for h in mutation_site_headers if h >= 4455])
202
-
203
  if ef_file:
204
  ef_df = pd.read_csv(ef_file, header=None)
205
- ef_df.columns = [str(site) for site in ascending_input_headers]
206
- for h in high_index_headers:
207
- ef_df[str(h)] = 0 # add dummy columns for high index as 0
208
  else:
209
- ef_df = pd.DataFrame(columns=[str(site) for site in ascending_input_headers + high_index_headers])
210
 
211
  edited_df = st.data_editor(ef_df, num_rows="dynamic")
212
 
213
  if st.button("Convert to Binary Labels"):
214
  binary_part = pd.DataFrame()
215
- for col in ascending_input_headers:
216
  col_str = str(col)
217
- threshold = thresholds[col]
218
  binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
219
- for col in high_index_headers:
220
- binary_part[str(col)] = 0
221
 
222
  binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]
223
 
@@ -236,90 +188,6 @@ with tab2:
236
  st.subheader("Decoded String (continuous across rows)")
237
  st.write(decoded_string)
238
 
239
- binary_ascending = binary_part[[str(h) for h in ascending_input_headers if str(h) in binary_part.columns]]
240
- st.subheader("Binary Labels (Ascending 3244→4455)")
241
- st.dataframe(binary_ascending)
242
- st.download_button("Download Ascending Order CSV", binary_ascending.to_csv(index=False), "ef_binary_labels_ascending.csv")
243
-
244
-
245
- # # Tab 3: EF → Binary
246
- # with tab3:
247
- # st.write("Upload an Editing Frequency CSV or enter manually:")
248
- # st.write("**Note:** Please upload CSV files **without column headers**. Just the 31 editing frequencies per row.")
249
- # ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
250
-
251
- # if ef_file:
252
- # # Read CSV without headers and assign mutation site headers
253
- # ef_df = pd.read_csv(ef_file, header=None)
254
- # ef_df.columns = [str(site) for site in mutation_site_headers]
255
- # else:
256
- # ef_df = pd.DataFrame(columns=[str(site) for site in mutation_site_headers])
257
-
258
-
259
- # edited_df = st.data_editor(ef_df, num_rows="dynamic")
260
-
261
- # if st.button("Convert to Binary Labels"):
262
- # int_map = {str(k): k for k in thresholds.index}
263
- # matching_cols = [col for col in edited_df.columns if col in int_map]
264
-
265
- # binary_part = pd.DataFrame()
266
- # for col in matching_cols:
267
- # col_threshold = thresholds[int_map[col]]
268
- # binary_part[col] = (edited_df[col].astype(float) >= col_threshold).astype(int)
269
-
270
- # non_binary_part = edited_df.drop(columns=matching_cols, errors='ignore')
271
- # binary_df = pd.concat([non_binary_part, binary_part], axis=1)
272
-
273
- # def color_binary(val):
274
- # if val == 1: return "background-color: lightgreen"
275
- # if val == 0: return "background-color: lightcoral"
276
- # return ""
277
-
278
- # st.subheader("Binary Labels")
279
- # styled = binary_df.style.applymap(color_binary, subset=matching_cols)
280
- # st.dataframe(styled)
281
- # st.download_button("Download CSV", binary_df.to_csv(index=False), "ef_binary_labels.csv")
282
-
283
- # # Convert to bitstrings and strings
284
- # binary_strings = []
285
- # decoded_strings = []
286
- # for _, row in binary_part.iterrows():
287
- # bitlist = row.values.tolist()
288
- # bitstring = ''.join(str(b) for b in bitlist)
289
- # binary_strings.append(bitstring)
290
- # decoded_strings.append(binary_labels_to_string(bitlist))
291
-
292
- # st.subheader("Binary as Bitstrings")
293
- # for b in binary_strings:
294
- # st.code(b)
295
-
296
- # st.subheader("Decoded Voyager Strings")
297
- # for s in decoded_strings:
298
- # st.write(s)
299
-
300
-
301
- # # Tab 2: Image to Binary
302
- # with tab2:
303
- # uploaded = st.file_uploader("Upload an image (jpg/png)", type=["jpg", "jpeg", "png"])
304
- # if uploaded:
305
- # img = Image.open(uploaded)
306
- # st.image(img, caption="Original", use_column_width=True)
307
- # cropped = st_cropper(img, realtime_update=True, box_color="blue", aspect_ratio=None)
308
- # st.image(cropped, caption="Cropped", use_column_width=True)
309
-
310
- # max_pixels = st.slider("Max pixels to encode", 32, 1024, 256, 32)
311
- # binary_labels = image_to_binary_labels_rgb(cropped, max_pixels=max_pixels)
312
-
313
- # st.subheader("Binary Labels from Image")
314
- # groups = []
315
- # for i in range(0, len(binary_labels), 32):
316
- # group = binary_labels[i:i+32]
317
- # group += [0] * (32 - len(group))
318
- # groups.append(group + [sum(group)])
319
- # df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"])
320
- # st.dataframe(df)
321
-
322
- # st.subheader("Reconstructed Image")
323
- # recon = binary_labels_to_rgb_image(binary_labels)
324
- # st.image(recon, caption="Reconstructed", use_column_width=True)
325
- # st.download_button("Download CSV", df.to_csv(index=False), "image_binary_labels.csv")
 
33
  ]
34
 
35
  # Thresholds reordered accordingly
36
+ thresholds = pd.Series({h: thresholds_actual[h] for h in mutation_site_headers})
37
+
 
 
 
 
 
 
 
 
38
  # === Utility functions ===
39
 
40
  # Voyager ASCII 6-bit conversion table
 
50
  }
51
  reverse_voyager_table = {v: k for k, v in voyager_table.items()}
52
 
 
 
53
  def string_to_binary_labels(s: str) -> list[int]:
54
  bits = []
55
  for char in s:
 
67
  val = sum(b << (5 - j) for j, b in enumerate(chunk))
68
  chars.append(voyager_table.get(val, '?'))
69
  return ''.join(chars)
70
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image:
72
  img = img.convert("RGB")
73
  if img.width < min_size or img.height < min_size:
 
144
  st.dataframe(df)
145
  st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv")
146
 
147
+ ascending_headers = sorted(mutation_site_headers_actual)
 
148
  df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
149
+ st.subheader("Binary Labels (Ascending Order 3244 → 4882)")
150
  st.dataframe(df_sorted)
151
  st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
152
 
153
+ # Tab 2: EF → Binary
 
 
 
 
 
 
 
 
 
 
 
 
154
  with tab2:
155
  st.write("Upload an Editing Frequency CSV or enter manually:")
156
+ st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4882.")
157
  ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
158
 
 
 
 
159
  if ef_file:
160
  ef_df = pd.read_csv(ef_file, header=None)
161
+ ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual)]
 
 
162
  else:
163
+ ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual)])
164
 
165
  edited_df = st.data_editor(ef_df, num_rows="dynamic")
166
 
167
  if st.button("Convert to Binary Labels"):
168
  binary_part = pd.DataFrame()
169
+ for col in sorted(mutation_site_headers_actual):
170
  col_str = str(col)
171
+ threshold = thresholds_actual[col]
172
  binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
 
 
173
 
174
  binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]
175
 
 
188
  st.subheader("Decoded String (continuous across rows)")
189
  st.write(decoded_string)
190
 
191
+ st.subheader("Binary Labels (Ascending 3244→4882)")
192
+ st.dataframe(binary_part)
193
+ st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv")