import streamlit as st import numpy as np import pandas as pd # Mutation site headers removed 3614, mutation_site_headers_actual = [ 3244, 3297, 3350, 3399, 3455, 3509, 3562, 3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039, 4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455, 4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882 ] # Thresholds for each mutation site removed 3614: 0.091557752, thresholds_actual = pd.Series({ 3244: 1.096910677, 3297: 0.923658795, 3350: 0.668939037, 3399: 0.914305214, 3455: 1.297392984, 3509: 1.812636208, 3562: 1.185047484, 3665: 0.298007308, 3720: 0.58857544, 3773: 0.882561082, 3824: 1.149082617, 3879: 0.816050702, 3933: 2.936517653, 3985: 1.597166791, 4039: 0.962108082, 4089: 1.479783497, 4145: 0.305853225, 4190: 1.311869541, 4245: 1.707556905, 4298: 0.875013076, 4349: 1.227704526, 4402: 0.593206446, 4455: 1.179633137, 4510: 1.272477799, 4561: 1.293841573, 4615: 1.16821885, 4668: 1.40306, 4720: 0.706530878, 4773: 1.483114072, 4828: 0.954939873, 4882: 1.47524328 }) # Mutation site headers reordered: 4402 to 3244, 4882 to 4455 mutation_site_headers = [ 4402, 4349, 4298, 4245, 4190, 4145, 4089, 4039, 3985, 3933, 3879, 3824, 3773, 3720, 3665, 3562, 3509, 3455, 3399, 3350, 3297, 3244, # 1–23 4882, 4828, 4773, 4720, 4668, 4615, 4561, 4510, 4455 # 24–32 ] # Thresholds reordered accordingly thresholds = pd.Series({h: thresholds_actual[h] for h in mutation_site_headers}) # === Utility functions === # Voyager ASCII 6-bit conversion table voyager_table = { i: ch for i, ch in enumerate([ ' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', ',', '(', ')','+', '-', '*', '/', '=', '$', '!', ':', '%', '"', '#', '@', "'", '?', '&' ]) } reverse_voyager_table = {v: k for k, v in voyager_table.items()} def string_to_binary_labels(s: str) -> list[int]: bits = [] for char in s: val = reverse_voyager_table.get(char.upper(), 0) char_bits = [(val >> bit) & 1 for bit in range(5, -1, -1)] bits.extend(char_bits) return bits def binary_labels_to_string(bits: list[int]) -> str: chars = [] for i in range(0, len(bits), 6): chunk = bits[i:i+6] if len(chunk) < 6: chunk += [0] * (6 - len(chunk)) val = sum(b << (5 - j) for j, b in enumerate(chunk)) chars.append(voyager_table.get(val, '?')) return ''.join(chars) # === Streamlit App === st.title("ASCII & Binary Label Converter") tab1, tab2, tab3, tab4, tab5 = st.tabs(["Text to Binary Labels (31)", "EF → Binary → String (31)", "Text to Binary Labels (32)", "EF → Binary (32)", "Binary → String"]) # Tab 1: Text to Binary with tab1: user_input = st.text_input("Enter text", value="DNA", key="input_text_31") if user_input: ascii_codes = [reverse_voyager_table.get(c.upper(), 0) for c in user_input] binary_labels = string_to_binary_labels(user_input) # st.subheader("Voyager ASCII Codes") # st.write(ascii_codes) st.subheader("Binary Labels per Character") grouped = [binary_labels[i:i+6] for i in range(0, len(binary_labels), 6)] for i, bits in enumerate(grouped): st.write(f"'{user_input[i]}' → {bits}") st.subheader("Binary Labels (31-bit groups)") groups = [] for i in range(0, len(binary_labels), 31): group = binary_labels[i:i+31] group += [0] * (31 - len(group)) groups.append(group + [sum(group)]) df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"]) st.dataframe(df) st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv", key="download_csv_tab1_31csv") ascending_headers = sorted(mutation_site_headers_actual) df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]] df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]].copy() if "3614" not in df_sorted.columns: idx = df_sorted.columns.get_loc("3562") + 1 # Insert after 3562 df_sorted.insert(idx, "3614", 0) st.subheader("Binary Labels (Ascending Order 3244 → 4882)") st.dataframe(df_sorted) st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv", key="download_csv_tab1_ascend") # === Robot Preparation Script Generation === st.subheader("Robot Preparation Script") robot_template = pd.read_csv("/home/user/app/Robot.csv", skiprows=3) robot_template.columns = ['Labware', 'Source', 'Labware_2', 'Destination', 'Volume', 'Tool', 'Name'] # Add Sample numbers for well referencing df_sorted.insert(0, 'Sample', range(1, len(df_sorted)+1)) # Step 1: Count the number of edited sites per row df_sorted['# donors'] = df_sorted.iloc[:, 1:].sum(axis=1) # Step 2: Calculate volume per donor (32 / # donors) df_sorted['volume donors (µl)'] = 32 / df_sorted['# donors'] # Step 3: Generate the robot script robot_script = [] source_wells = robot_template['Source'].unique().tolist() if len(source_wells) < 32: source_wells += [f"Fake{i}" for i in range(32 - len(source_wells))] source_wells = source_wells[:32] st.write(f"Number of source wells: {len(source_wells)}") st.write(f"Number of binary columns: {len(df_sorted.columns[1:33])}") for i, col in enumerate(df_sorted.columns[1:33]): for row_idx, sample in df_sorted.iterrows(): if sample[col] == 1: source = source_wells[i] dest = f"A{sample['Sample']}" vol = round(sample['volume donors (µl)'], 2) robot_script.append({'Source': source, 'Destination': dest, 'Volume': vol}) robot_script_df = pd.DataFrame(robot_script) st.dataframe(robot_script_df) st.download_button("Download Robot Script CSV", robot_script_df.to_csv(index=False), "robot_script.csv", key="download_csv_tab1_robot") # === Robot Preparation Script (Based on Ascending Order 3244 → 4882) === st.subheader("Robot Preparation Script (Based on Ascending Order 3244 → 4882)") df_sorted_ascend = df_sorted.copy() df_sorted_ascend.insert(0, 'Sample', range(1, len(df_sorted_ascend)+1)) df_sorted_ascend['# donors'] = df_sorted_ascend.iloc[:, 1:].sum(axis=1) df_sorted_ascend['volume donors (µl)'] = 32 / df_sorted_ascend['# donors'] robot_script_ascend = [] # Use the same source wells (length should be 32 already) for i, col in enumerate(df_sorted_ascend.columns[1:33]): # 32 binary columns for row_idx, sample in df_sorted_ascend.iterrows(): if sample[col] == 1: source = source_wells[i] dest = f"A{sample['Sample']}" vol = round(sample['volume donors (µl)'], 2) robot_script_ascend.append({'Source': source, 'Destination': dest, 'Volume': vol}) robot_script_ascend_df = pd.DataFrame(robot_script_ascend) st.dataframe(robot_script_ascend_df) st.download_button("Download Ascending Robot Script CSV", robot_script_ascend_df.to_csv(index=False), "robot_script_ascending.csv", key="download_csv_tab1_robot_ascend") # Tab 2: EF → Binary with tab2: st.write("Upload an Editing Frequency CSV or enter manually:") st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4882.") ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef") if ef_file: ef_df = pd.read_csv(ef_file, header=None) ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual)] else: ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual)]) edited_df = st.data_editor(ef_df, num_rows="dynamic") if st.button("Convert to Binary Labels", key="convert_button_tab2"): binary_part = pd.DataFrame() for col in sorted(mutation_site_headers_actual): col_str = str(col) threshold = thresholds_actual[col] binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int) binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]] def color_binary(val): if val == 1: return "background-color: lightgreen" if val == 0: return "background-color: lightcoral" return "" st.subheader("Binary Labels (Reordered 4402→3244, 4882→4455)") styled = binary_reordered.style.applymap(color_binary) st.dataframe(styled) st.download_button("Download CSV", binary_reordered.to_csv(index=False), "ef_binary_labels.csv", key="download_csv_tab2_csv") all_bits = binary_reordered.values.flatten().tolist() decoded_string = binary_labels_to_string(all_bits) st.subheader("Decoded String (continuous across rows)") st.write(decoded_string) st.subheader("Binary Labels (Ascending 3244→4882)") st.dataframe(binary_part.style.applymap(color_binary)) st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv", key="download_csv_tab2_ascend") all_bits = binary_part.values.flatten().tolist() decoded_string = binary_labels_to_string(all_bits) st.subheader("Decoded String (continuous across rows)") st.write(decoded_string) # Mutation site headers did not remove 3614, mutation_site_headers_actual_3614 = [ 3244, 3297, 3350, 3399, 3455, 3509, 3562, 3614, 3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039, 4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455, 4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882 ] # Thresholds for each mutation site removed 3614: 0.091557752, thresholds_actual_3614 = pd.Series({ 3244: 1.096910677, 3297: 0.923658795, 3350: 0.668939037, 3399: 0.914305214, 3455: 1.297392984, 3509: 1.812636208, 3562: 1.185047484, 3614: 0.157969131375, 3665: 0.298007308, 3720: 0.58857544, 3773: 0.882561082, 3824: 1.149082617, 3879: 0.816050702, 3933: 2.936517653, 3985: 1.597166791, 4039: 0.962108082, 4089: 1.479783497, 4145: 0.305853225, 4190: 1.311869541, 4245: 1.707556905, 4298: 0.875013076, 4349: 1.227704526, 4402: 0.593206446, 4455: 1.179633137, 4510: 1.272477799, 4561: 1.293841573, 4615: 1.16821885, 4668: 1.40306, 4720: 0.706530878, 4773: 1.483114072, 4828: 0.954939873, 4882: 1.47524328 }) # Mutation site headers reordered: 4402 to 3244, 4882 to 4455 mutation_site_headers_3614 = [ 4402, 4349, 4298, 4245, 4190, 4145, 4089, 4039, 3985, 3933, 3879, 3824, 3773, 3720, 3665, 3614, 3562, 3509, 3455, 3399, 3350, 3297, 3244, # 1–23 4882, 4828, 4773, 4720, 4668, 4615, 4561, 4510, 4455 # 24–32 ] # Thresholds reordered accordingly thresholds_3614 = pd.Series({h: thresholds_actual_3614[h] for h in mutation_site_headers_3614}) # === Utility functions === # Voyager ASCII 6-bit conversion table voyager_table = { i: ch for i, ch in enumerate([ ' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '(', ')', '+', '-', '*', '/', '=', '$', '!', ':', '%', '"', '#', '@', '\'', '?', '&' ]) } reverse_voyager_table = {v: k for k, v in voyager_table.items()} # Tab 3: Text to Binary (32) with tab3: user_input_32 = st.text_input("Enter text", value="DNA", key="input_text_32") if user_input_32: ascii_codes = [ord(c) for c in user_input_32] binary_labels = string_to_binary_labels(user_input_32) st.subheader("ASCII Codes") st.write(ascii_codes) st.subheader("Binary Labels per Character") grouped = [binary_labels[i:i+6] for i in range(0, len(binary_labels), 6)] for i, bits in enumerate(grouped): st.write(f"'{user_input_32[i]}' → {bits}") st.subheader("Binary Labels (32-bit groups)") groups = [] for i in range(0, len(binary_labels), 32): group = binary_labels[i:i+32] group += [0] * (32 - len(group)) groups.append(group + [sum(group)]) df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers_3614] + ["Edited Sites"]) st.dataframe(df) st.download_button("Download as CSV", df.to_csv(index=False), "text_32_binary_labels.csv", key="download_csv_tab3_csv") ascending_headers = sorted(mutation_site_headers_actual_3614) df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]] st.subheader("Binary Labels (Ascending Order 3244 → 4882)") st.dataframe(df_sorted) st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv", key="download_csv_tab3_ascend") # === Robot Preparation Script Generation === st.subheader("Robot Preparation Script") robot_template = pd.read_csv("/home/user/app/Robot.csv", skiprows=3) robot_template.columns = ['Labware', 'Source', 'Labware_2', 'Destination', 'Volume', 'Tool', 'Name'] # Add Sample numbers for well referencing df_sorted.insert(0, 'Sample', range(1, len(df_sorted)+1)) # Step 1: Count the number of edited sites per row df_sorted['# donors'] = df_sorted.iloc[:, 1:].sum(axis=1) # Step 2: Calculate volume per donor (32 / # donors) df_sorted['volume donors (µl)'] = 32 / df_sorted['# donors'] # Step 3: Generate the robot script robot_script = [] source_wells = robot_template['Source'].unique().tolist()[:32] for i, col in enumerate(df_sorted.columns[1:33]): for row_idx, sample in df_sorted.iterrows(): if sample[col] == 1: source = source_wells[i] dest = f"A{sample['Sample']}" vol = round(sample['volume donors (µl)'], 2) robot_script.append({'Source': source, 'Destination': dest, 'Volume': vol}) robot_script_df = pd.DataFrame(robot_script) st.dataframe(robot_script_df) st.download_button("Download Robot Script CSV", robot_script_df.to_csv(index=False), "robot_script.csv", key="download_csv_tab3_robot") # Tab 4: EF → Binary (32) with tab4: st.write("Upload an Editing Frequency CSV or enter manually:") st.write("**Note:** Please upload CSV files **without column headers**, in ascending order from 3244 to 4882.") ef_file_2 = st.file_uploader("Upload EF CSV", type=["csv"], key="ef2") if ef_file_2: ef_df = pd.read_csv(ef_file_2, header=None) ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual_3614)] else: ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual_3614)]) edited_df = st.data_editor(ef_df, num_rows="dynamic") if st.button("Convert to Binary Labels", key="convert_button_tab4"): binary_part = pd.DataFrame() for col in sorted(mutation_site_headers_actual_3614): col_str = str(col) threshold = thresholds_actual_3614[col] binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int) binary_reordered = binary_part[[str(h) for h in mutation_site_headers_3614 if str(h) in binary_part.columns]] def color_binary(val): if val == 1: return "background-color: lightgreen" if val == 0: return "background-color: lightcoral" return "" st.subheader("Binary Labels (Reordered 4402→3244, 4882→4455)") styled = binary_reordered.style.applymap(color_binary) st.dataframe(styled) st.download_button("Download CSV", binary_reordered.to_csv(index=False), "ef_binary_labels.csv", key="download_csv_tab4_csv") all_bits = binary_reordered.values.flatten().tolist() decoded_string = binary_labels_to_string(all_bits) st.subheader("Decoded String (continuous across rows)") st.write(decoded_string) st.subheader("Binary Labels (Ascending 3244→4882)") st.dataframe(binary_part.style.applymap(color_binary)) st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv", key="download_csv_tab4_ascend") all_bits = binary_part.values.flatten().tolist() decoded_string = binary_labels_to_string(all_bits) st.subheader("Decoded String (continuous across rows)") st.write(decoded_string) # Tab 5: Binary → String with tab5: st.header("Decode Binary Labels to String") st.subheader("🔘 Option 1: 32-bit Binary per Row") st.write("Upload CSV with 32 columns (0 or 1), no headers, from EF Binary format.") binary32_file = st.file_uploader("Upload 32-bit Binary CSV", type=["csv"], key="binary_32") if binary32_file: df_32 = pd.read_csv(binary32_file, header=None) if df_32.shape[1] != 32: st.warning("⚠️ CSV must have exactly 32 columns.") else: # Reordered: 4402 → 3244, 4882 → 4455 df_32.columns = [str(h) for h in mutation_site_headers_3614] all_bits = df_32.values.flatten().astype(int).tolist() decoded_reordered = binary_labels_to_string(all_bits) st.subheader("Decoded String (Reordered 4402→3244, 4882→4455)") st.write(decoded_reordered) st.download_button("Download Reordered CSV", df_32.to_csv(index=False), "decoded_binary_32_reordered.csv", key="download_csv_tab5_32_reordered") # Ascending: 3244 → 4882 df_ascending = df_32[[str(h) for h in mutation_site_headers_actual_3614 if str(h) in df_32.columns]] decoded_asc = binary_labels_to_string(df_ascending.values.flatten().astype(int).tolist()) st.subheader("Decoded String (Ascending 3244→4882)") st.write(decoded_asc) st.download_button("Download Ascending CSV", df_ascending.to_csv(index=False), "decoded_binary_32_ascending.csv", key="download_csv_tab5_32_ascend") st.markdown("---") st.subheader("🔘 Option 2: 31-bit Binary Grouped per Row") st.write("Upload CSV with 31 columns per row (each row = one 6-bit ASCII chunk group).") binary31_file = st.file_uploader("Upload 31-bit Group CSV", type=["csv"], key="binary_31") if binary31_file: df_31 = pd.read_csv(binary31_file, header=None) def trim_row(row): bits = row.dropna().astype(int).tolist()[:31] return bits decoded_rows = [] for _, row in df_31.iterrows(): bits = trim_row(row) decoded_rows.append(binary_labels_to_string(bits)) st.subheader("Decoded String from 31-bit Chunks") full_decoded = "".join(decoded_rows) st.write(full_decoded) st.download_button("Download Concatenated Output", full_decoded, "decoded_31bit_string.txt", key="download_csv_tab5_31")