import streamlit as st from PIL import Image, ImageFilter import numpy as np import pandas as pd from streamlit_cropper import st_cropper # Predefined headers for the 32 mutation sites mutation_site_headers = [ 3244, 3297, 3350, 3399, 3455, 3509, 3562, 3614, 3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039, 4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455, 4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882 ] # Load thresholds from file thresholds = pd.Series({ 3244: 1.094293328, 3297: 0.924916122, 3350: 0.664586629, 3399: 0.91573613, 3455: 1.300869714, 3509: 1.821975901, 3562: 1.178862418, 3614: 0.091557752, 3665: 0.298697327, 3720: 0.58379781, 3773: 0.891088481, 3824: 1.145509641, 3879: 0.81833191, 3933: 2.93084335, 3985: 1.593758847, 4039: 0.966055013, 4089: 1.465671338, 4145: 0.30309335, 4190: 1.321615138, 4245: 1.709752495, 4298: 0.868534701, 4349: 1.222907645, 4402: 0.58873557, 4455: 1.185522985, 4510: 1.266797682, 4561: 1.109913024, 4615: 1.181106084, 4668: 1.408533949, 4720: 0.714151142, 4773: 1.471959437, 4828: 0.95879943, 4882: 1.464503885 }) # ----------------------------------------- # Utility functions # ----------------------------------------- def string_to_binary_labels(s: str) -> list[int]: bits = [] for char in s: ascii_code = ord(char) char_bits = [(ascii_code >> bit) & 1 for bit in range(7, -1, -1)] bits.extend(char_bits) return bits def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image: img = img.convert("RGB") if img.width < min_size or img.height < min_size: img = img.resize((min_size, min_size)) img = img.filter(ImageFilter.GaussianBlur(radius=1)) return img def image_to_binary_labels_rgb(img: Image.Image, max_pixels: int = 256) -> list[int]: img = clean_image(img) img.thumbnail((int(np.sqrt(max_pixels)), int(np.sqrt(max_pixels)))) img_array = np.array(img) flat_pixels = img_array.reshape(-1, 3) bits = [] for pixel in flat_pixels: for channel in pixel: channel_bits = [(channel >> bit) & 1 for bit in range(7, -1, -1)] bits.extend(channel_bits) return bits def binary_labels_to_rgb_image(binary_labels: list[int], width: int = None, height: int = None) -> Image.Image: total_pixels = len(binary_labels) // 24 if width is None or height is None: side = int(np.ceil(np.sqrt(total_pixels))) width = height = side needed_pixels = width * height needed_bits = needed_pixels * 24 if len(binary_labels) < needed_bits: binary_labels += [0] * (needed_bits - len(binary_labels)) pixels = [] for i in range(0, needed_bits, 24): r_bits = binary_labels[i:i+8] g_bits = binary_labels[i+8:i+16] b_bits = binary_labels[i+16:i+24] r = sum(b << (7-j) for j, b in enumerate(r_bits)) g = sum(b << (7-j) for j, b in enumerate(g_bits)) b = sum(b << (7-j) for j, b in enumerate(b_bits)) pixels.append((r, g, b)) array = np.array(pixels, dtype=np.uint8).reshape((height, width, 3)) img = Image.fromarray(array, mode='RGB') return img # ----------------------------------------- # Streamlit App # ----------------------------------------- st.title("ASCII & Binary Label Converter") tab1, tab2, tab3 = st.tabs(["Text to Binary Labels", "Image to Binary Labels", "EF → Binary"]) # ================= Tab 1 =================== with tab1: st.write("Enter text to see its ASCII codes and corresponding binary labels:") user_input = st.text_input("Text Input", value="DNA") if user_input: ascii_codes = [ord(c) for c in user_input] binary_labels = string_to_binary_labels(user_input) st.subheader("ASCII Codes") st.write(ascii_codes) st.subheader("Binary Labels per Character") grouped_chars = [binary_labels[i:i+8] for i in range(0, len(binary_labels), 8)] for idx, bits in enumerate(grouped_chars): st.write(f"'{user_input[idx]}' → {bits}") st.subheader("Binary Labels (32-bit groups)") num_groups = (len(binary_labels) + 31) // 32 table_data = [] for grp_idx in range(num_groups): start = grp_idx * 32 end = start + 32 group = binary_labels[start:end] if len(group) < 32: group += [0] * (32 - len(group)) edited_sites = sum(group) row = group + [edited_sites] table_data.append(row) df = pd.DataFrame(table_data, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"]) st.dataframe(df) st.download_button( label="Download Binary Labels Table as CSV", data=df.to_csv(index=False), file_name="binary_labels_table.csv", mime="text/csv" ) # ================= Tab 2 =================== with tab2: st.write("Upload an image (JPG or PNG) to convert it into binary labels:") uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: img = Image.open(uploaded_file) st.image(img, caption="Uploaded Image", use_column_width=True) st.subheader("Crop the image with drag and select (Free aspect ratio)") cropped_img = st_cropper(img, realtime_update=True, box_color='blue', aspect_ratio=None) st.image(cropped_img, caption="Cropped Image", use_column_width=True) max_pixels = st.slider("Max number of pixels to encode", min_value=32, max_value=1024, value=256, step=32) binary_labels = image_to_binary_labels_rgb(cropped_img, max_pixels=max_pixels) st.subheader("Binary Labels from Image") num_groups = (len(binary_labels) + 31) // 32 table_data = [] for grp_idx in range(num_groups): start = grp_idx * 32 end = start + 32 group = binary_labels[start:end] if len(group) < 32: group += [0] * (32 - len(group)) edited_sites = sum(group) row = group + [edited_sites] table_data.append(row) df = pd.DataFrame(table_data, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"]) st.dataframe(df) st.subheader("Reconstructed RGB Image") reconstructed_img = binary_labels_to_rgb_image(binary_labels) st.image(reconstructed_img, caption="Reconstructed Image", use_column_width=True) st.download_button( label="Download Image Binary Labels Table as CSV", data=df.to_csv(index=False), file_name="image_binary_labels_table.csv", mime="text/csv" ) # ================= Tab 3 =================== with tab3: st.write("Upload an Editing Frequency CSV or fill in manually:") ef_file = st.file_uploader("Upload Editing Frequency CSV", type=["csv"], key="ef") if ef_file: ef_df = pd.read_csv(ef_file) ef_df = ef_df.loc[:, ~ef_df.columns.str.contains('^Unnamed')] else: ef_df = pd.DataFrame(columns=thresholds.index) edited_df = st.data_editor(ef_df, num_rows="dynamic") if st.button("Convert to Binary Labels"): common_cols = list(set(edited_df.columns) & set(thresholds.index)) numeric_cols = edited_df[common_cols].select_dtypes(include=[np.number]).columns.tolist() binary_part = edited_df[numeric_cols].ge(thresholds[numeric_cols]).astype(int) non_binary_part = edited_df.drop(columns=numeric_cols, errors='ignore') binary_df = pd.concat([non_binary_part, binary_part], axis=1) def highlight_binary(val): if val == 1: return 'background-color: lightgreen' elif val == 0: return 'background-color: lightcoral' else: return '' styled_binary_df = binary_df.style.applymap(highlight_binary, subset=numeric_cols) st.subheader("Binary Labels") st.dataframe(styled_binary_df) # ✅ Display thresholded binary table st.download_button( label="Download Binary Labels Table as CSV", data=binary_df.to_csv(index=False), file_name="ef_binary_labels_table.csv", mime="text/csv" )