Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -33,16 +33,8 @@ mutation_site_headers = [
|
|
33 |
]
|
34 |
|
35 |
# Thresholds reordered accordingly
|
36 |
-
thresholds = pd.Series({
|
37 |
-
|
38 |
-
4190: 1.321615138, 4145: 0.30309335, 4089: 1.465671338, 4039: 0.966055013,
|
39 |
-
3985: 1.593758847, 3933: 2.93084335, 3879: 0.81833191, 3824: 1.145509641,
|
40 |
-
3773: 0.891088481, 3720: 0.58379781, 3665: 0.298697327,
|
41 |
-
3562: 1.178862418, 3509: 1.821975901, 3455: 1.300869714, 3399: 0.91573613,
|
42 |
-
3350: 0.664586629, 3297: 0.924916122, 3244: 1.094293328,
|
43 |
-
4882: 1.464503885, 4828: 0.95879943, 4773: 1.471959437, 4720: 0.714151142,
|
44 |
-
4668: 1.408533949, 4615: 1.181106084, 4561: 1.109913024, 4510: 1.266797682, 4455: 1.185522985
|
45 |
-
})
|
46 |
# === Utility functions ===
|
47 |
|
48 |
# Voyager ASCII 6-bit conversion table
|
@@ -58,8 +50,6 @@ voyager_table = {
|
|
58 |
}
|
59 |
reverse_voyager_table = {v: k for k, v in voyager_table.items()}
|
60 |
|
61 |
-
# === Utility functions ===
|
62 |
-
|
63 |
def string_to_binary_labels(s: str) -> list[int]:
|
64 |
bits = []
|
65 |
for char in s:
|
@@ -77,25 +67,7 @@ def binary_labels_to_string(bits: list[int]) -> str:
|
|
77 |
val = sum(b << (5 - j) for j, b in enumerate(chunk))
|
78 |
chars.append(voyager_table.get(val, '?'))
|
79 |
return ''.join(chars)
|
80 |
-
|
81 |
-
# def string_to_binary_labels(s: str) -> list[int]:
|
82 |
-
# bits = []
|
83 |
-
# for char in s:
|
84 |
-
# ascii_code = ord(char)
|
85 |
-
# char_bits = [(ascii_code >> bit) & 1 for bit in range(7, -1, -1)]
|
86 |
-
# bits.extend(char_bits)
|
87 |
-
# return bits
|
88 |
-
|
89 |
-
# def binary_labels_to_string(bits: list[int]) -> str:
|
90 |
-
# chars = []
|
91 |
-
# for i in range(0, len(bits), 8):
|
92 |
-
# byte = bits[i:i+8]
|
93 |
-
# if len(byte) < 8:
|
94 |
-
# byte += [0] * (8 - len(byte))
|
95 |
-
# ascii_val = sum(b << (7 - j) for j, b in enumerate(byte))
|
96 |
-
# chars.append(chr(ascii_val))
|
97 |
-
# return ''.join(chars)
|
98 |
-
|
99 |
def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image:
|
100 |
img = img.convert("RGB")
|
101 |
if img.width < min_size or img.height < min_size:
|
@@ -172,52 +144,32 @@ with tab1:
|
|
172 |
st.dataframe(df)
|
173 |
st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv")
|
174 |
|
175 |
-
|
176 |
-
ascending_headers = sorted([h for h in mutation_site_headers if h <= 4455])
|
177 |
df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
|
178 |
-
st.subheader("Binary Labels (Ascending Order 3244 →
|
179 |
st.dataframe(df_sorted)
|
180 |
st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
|
181 |
|
182 |
-
|
183 |
-
# st.subheader("Binary Labels (27-bit groups)")
|
184 |
-
# groups = []
|
185 |
-
# for i in range(0, len(binary_labels), 27):
|
186 |
-
# group = binary_labels[i:i+27]
|
187 |
-
# group += [0] * (27 - len(group))
|
188 |
-
# groups.append(group + [sum(group)])
|
189 |
-
|
190 |
-
# df_27 = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"])
|
191 |
-
# st.dataframe(df_27)
|
192 |
-
# st.download_button("Download as CSV", df_27.to_csv(index=False), "text_27_binary_labels.csv")
|
193 |
-
|
194 |
-
# Tab 3: EF → Binary
|
195 |
with tab2:
|
196 |
st.write("Upload an Editing Frequency CSV or enter manually:")
|
197 |
-
st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to
|
198 |
ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
|
199 |
|
200 |
-
ascending_input_headers = sorted([h for h in mutation_site_headers if 3244 <= h <= 4402])
|
201 |
-
high_index_headers = sorted([h for h in mutation_site_headers if h >= 4455])
|
202 |
-
|
203 |
if ef_file:
|
204 |
ef_df = pd.read_csv(ef_file, header=None)
|
205 |
-
ef_df.columns = [str(site) for site in
|
206 |
-
for h in high_index_headers:
|
207 |
-
ef_df[str(h)] = 0 # add dummy columns for high index as 0
|
208 |
else:
|
209 |
-
ef_df = pd.DataFrame(columns=[str(site) for site in
|
210 |
|
211 |
edited_df = st.data_editor(ef_df, num_rows="dynamic")
|
212 |
|
213 |
if st.button("Convert to Binary Labels"):
|
214 |
binary_part = pd.DataFrame()
|
215 |
-
for col in
|
216 |
col_str = str(col)
|
217 |
-
threshold =
|
218 |
binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
|
219 |
-
for col in high_index_headers:
|
220 |
-
binary_part[str(col)] = 0
|
221 |
|
222 |
binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]
|
223 |
|
@@ -236,90 +188,6 @@ with tab2:
|
|
236 |
st.subheader("Decoded String (continuous across rows)")
|
237 |
st.write(decoded_string)
|
238 |
|
239 |
-
|
240 |
-
st.
|
241 |
-
st.
|
242 |
-
st.download_button("Download Ascending Order CSV", binary_ascending.to_csv(index=False), "ef_binary_labels_ascending.csv")
|
243 |
-
|
244 |
-
|
245 |
-
# # Tab 3: EF → Binary
|
246 |
-
# with tab3:
|
247 |
-
# st.write("Upload an Editing Frequency CSV or enter manually:")
|
248 |
-
# st.write("**Note:** Please upload CSV files **without column headers**. Just the 31 editing frequencies per row.")
|
249 |
-
# ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
|
250 |
-
|
251 |
-
# if ef_file:
|
252 |
-
# # Read CSV without headers and assign mutation site headers
|
253 |
-
# ef_df = pd.read_csv(ef_file, header=None)
|
254 |
-
# ef_df.columns = [str(site) for site in mutation_site_headers]
|
255 |
-
# else:
|
256 |
-
# ef_df = pd.DataFrame(columns=[str(site) for site in mutation_site_headers])
|
257 |
-
|
258 |
-
|
259 |
-
# edited_df = st.data_editor(ef_df, num_rows="dynamic")
|
260 |
-
|
261 |
-
# if st.button("Convert to Binary Labels"):
|
262 |
-
# int_map = {str(k): k for k in thresholds.index}
|
263 |
-
# matching_cols = [col for col in edited_df.columns if col in int_map]
|
264 |
-
|
265 |
-
# binary_part = pd.DataFrame()
|
266 |
-
# for col in matching_cols:
|
267 |
-
# col_threshold = thresholds[int_map[col]]
|
268 |
-
# binary_part[col] = (edited_df[col].astype(float) >= col_threshold).astype(int)
|
269 |
-
|
270 |
-
# non_binary_part = edited_df.drop(columns=matching_cols, errors='ignore')
|
271 |
-
# binary_df = pd.concat([non_binary_part, binary_part], axis=1)
|
272 |
-
|
273 |
-
# def color_binary(val):
|
274 |
-
# if val == 1: return "background-color: lightgreen"
|
275 |
-
# if val == 0: return "background-color: lightcoral"
|
276 |
-
# return ""
|
277 |
-
|
278 |
-
# st.subheader("Binary Labels")
|
279 |
-
# styled = binary_df.style.applymap(color_binary, subset=matching_cols)
|
280 |
-
# st.dataframe(styled)
|
281 |
-
# st.download_button("Download CSV", binary_df.to_csv(index=False), "ef_binary_labels.csv")
|
282 |
-
|
283 |
-
# # Convert to bitstrings and strings
|
284 |
-
# binary_strings = []
|
285 |
-
# decoded_strings = []
|
286 |
-
# for _, row in binary_part.iterrows():
|
287 |
-
# bitlist = row.values.tolist()
|
288 |
-
# bitstring = ''.join(str(b) for b in bitlist)
|
289 |
-
# binary_strings.append(bitstring)
|
290 |
-
# decoded_strings.append(binary_labels_to_string(bitlist))
|
291 |
-
|
292 |
-
# st.subheader("Binary as Bitstrings")
|
293 |
-
# for b in binary_strings:
|
294 |
-
# st.code(b)
|
295 |
-
|
296 |
-
# st.subheader("Decoded Voyager Strings")
|
297 |
-
# for s in decoded_strings:
|
298 |
-
# st.write(s)
|
299 |
-
|
300 |
-
|
301 |
-
# # Tab 2: Image to Binary
|
302 |
-
# with tab2:
|
303 |
-
# uploaded = st.file_uploader("Upload an image (jpg/png)", type=["jpg", "jpeg", "png"])
|
304 |
-
# if uploaded:
|
305 |
-
# img = Image.open(uploaded)
|
306 |
-
# st.image(img, caption="Original", use_column_width=True)
|
307 |
-
# cropped = st_cropper(img, realtime_update=True, box_color="blue", aspect_ratio=None)
|
308 |
-
# st.image(cropped, caption="Cropped", use_column_width=True)
|
309 |
-
|
310 |
-
# max_pixels = st.slider("Max pixels to encode", 32, 1024, 256, 32)
|
311 |
-
# binary_labels = image_to_binary_labels_rgb(cropped, max_pixels=max_pixels)
|
312 |
-
|
313 |
-
# st.subheader("Binary Labels from Image")
|
314 |
-
# groups = []
|
315 |
-
# for i in range(0, len(binary_labels), 32):
|
316 |
-
# group = binary_labels[i:i+32]
|
317 |
-
# group += [0] * (32 - len(group))
|
318 |
-
# groups.append(group + [sum(group)])
|
319 |
-
# df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"])
|
320 |
-
# st.dataframe(df)
|
321 |
-
|
322 |
-
# st.subheader("Reconstructed Image")
|
323 |
-
# recon = binary_labels_to_rgb_image(binary_labels)
|
324 |
-
# st.image(recon, caption="Reconstructed", use_column_width=True)
|
325 |
-
# st.download_button("Download CSV", df.to_csv(index=False), "image_binary_labels.csv")
|
|
|
33 |
]
|
34 |
|
35 |
# Thresholds reordered accordingly
|
36 |
+
thresholds = pd.Series({h: thresholds_actual[h] for h in mutation_site_headers})
|
37 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# === Utility functions ===
|
39 |
|
40 |
# Voyager ASCII 6-bit conversion table
|
|
|
50 |
}
|
51 |
reverse_voyager_table = {v: k for k, v in voyager_table.items()}
|
52 |
|
|
|
|
|
53 |
def string_to_binary_labels(s: str) -> list[int]:
|
54 |
bits = []
|
55 |
for char in s:
|
|
|
67 |
val = sum(b << (5 - j) for j, b in enumerate(chunk))
|
68 |
chars.append(voyager_table.get(val, '?'))
|
69 |
return ''.join(chars)
|
70 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image:
|
72 |
img = img.convert("RGB")
|
73 |
if img.width < min_size or img.height < min_size:
|
|
|
144 |
st.dataframe(df)
|
145 |
st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv")
|
146 |
|
147 |
+
ascending_headers = sorted(mutation_site_headers_actual)
|
|
|
148 |
df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
|
149 |
+
st.subheader("Binary Labels (Ascending Order 3244 → 4882)")
|
150 |
st.dataframe(df_sorted)
|
151 |
st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")
|
152 |
|
153 |
+
# Tab 2: EF → Binary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
with tab2:
|
155 |
st.write("Upload an Editing Frequency CSV or enter manually:")
|
156 |
+
st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4882.")
|
157 |
ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")
|
158 |
|
|
|
|
|
|
|
159 |
if ef_file:
|
160 |
ef_df = pd.read_csv(ef_file, header=None)
|
161 |
+
ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual)]
|
|
|
|
|
162 |
else:
|
163 |
+
ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual)])
|
164 |
|
165 |
edited_df = st.data_editor(ef_df, num_rows="dynamic")
|
166 |
|
167 |
if st.button("Convert to Binary Labels"):
|
168 |
binary_part = pd.DataFrame()
|
169 |
+
for col in sorted(mutation_site_headers_actual):
|
170 |
col_str = str(col)
|
171 |
+
threshold = thresholds_actual[col]
|
172 |
binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)
|
|
|
|
|
173 |
|
174 |
binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]
|
175 |
|
|
|
188 |
st.subheader("Decoded String (continuous across rows)")
|
189 |
st.write(decoded_string)
|
190 |
|
191 |
+
st.subheader("Binary Labels (Ascending 3244→4882)")
|
192 |
+
st.dataframe(binary_part)
|
193 |
+
st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|