Spaces:

wenjun99
/

bitconverter

Sleeping

App Files Files Community

bitconverter / app.py

wenjun99

Update app.py

08696fc verified 5 months ago

raw

history blame

7.89 kB

	import streamlit as st
	from PIL import Image, ImageFilter
	import numpy as np
	import pandas as pd
	from streamlit_cropper import st_cropper

	# Mutation site headers removed 3614,
	mutation_site_headers_actual = [
	3244, 3297, 3350, 3399, 3455, 3509, 3562,
	3665, 3720, 3773, 3824, 3879, 3933, 3985, 4039,
	4089, 4145, 4190, 4245, 4298, 4349, 4402, 4455,
	4510, 4561, 4615, 4668, 4720, 4773, 4828, 4882
	]

	# Thresholds for each mutation site removed 3614: 0.091557752,
	thresholds_actual = pd.Series({
	3244: 1.094293328, 3297: 0.924916122, 3350: 0.664586629, 3399: 0.91573613,
	3455: 1.300869714, 3509: 1.821975901, 3562: 1.178862418,
	3665: 0.298697327, 3720: 0.58379781, 3773: 0.891088481, 3824: 1.145509641,
	3879: 0.81833191, 3933: 2.93084335, 3985: 1.593758847, 4039: 0.966055013,
	4089: 1.465671338, 4145: 0.30309335, 4190: 1.321615138, 4245: 1.709752495,
	4298: 0.868534701, 4349: 1.222907645, 4402: 0.58873557, 4455: 1.185522985,
	4510: 1.266797682, 4561: 1.109913024, 4615: 1.181106084, 4668: 1.408533949,
	4720: 0.714151142, 4773: 1.471959437, 4828: 0.95879943, 4882: 1.464503885
	})

	# Mutation site headers reordered: 4402 to 3244, 4882 to 4455
	mutation_site_headers = [
	4402, 4349, 4298, 4245, 4190, 4145, 4089, 4039,
	3985, 3933, 3879, 3824, 3773, 3720, 3665,
	3562, 3509, 3455, 3399, 3350, 3297, 3244, # 1–23
	4882, 4828, 4773, 4720, 4668, 4615, 4561, 4510, 4455 # 24–32
	]

	# Thresholds reordered accordingly
	thresholds = pd.Series({h: thresholds_actual[h] for h in mutation_site_headers})

	# === Utility functions ===

	# Voyager ASCII 6-bit conversion table
	voyager_table = {
	i: ch for i, ch in enumerate([
	' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
	'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
	'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2',
	'3', '4', '5', '6', '7', '8', '9', '.', '(', ')',
	'+', '-', '*', '/', '=', '$', '!', ':', '%', '"',
	'#', '@', '\'', '?', '&'
	])
	}
	reverse_voyager_table = {v: k for k, v in voyager_table.items()}

	def string_to_binary_labels(s: str) -> list[int]:
	bits = []
	for char in s:
	val = reverse_voyager_table.get(char.upper(), 0)
	char_bits = [(val >> bit) & 1 for bit in range(5, -1, -1)]
	bits.extend(char_bits)
	return bits

	def binary_labels_to_string(bits: list[int]) -> str:
	chars = []
	for i in range(0, len(bits), 6):
	chunk = bits[i:i+6]
	if len(chunk) < 6:
	chunk += [0] * (6 - len(chunk))
	val = sum(b << (5 - j) for j, b in enumerate(chunk))
	chars.append(voyager_table.get(val, '?'))
	return ''.join(chars)

	def clean_image(img: Image.Image, min_size: int = 256) -> Image.Image:
	img = img.convert("RGB")
	if img.width < min_size or img.height < min_size:
	img = img.resize((min_size, min_size))
	img = img.filter(ImageFilter.GaussianBlur(radius=1))
	return img

	def image_to_binary_labels_rgb(img: Image.Image, max_pixels: int = 256) -> list[int]:
	img = clean_image(img)
	img.thumbnail((int(np.sqrt(max_pixels)), int(np.sqrt(max_pixels))))
	img_array = np.array(img)
	flat_pixels = img_array.reshape(-1, 3)

	bits = []
	for pixel in flat_pixels:
	for channel in pixel:
	channel_bits = [(channel >> bit) & 1 for bit in range(7, -1, -1)]
	bits.extend(channel_bits)
	return bits

	def binary_labels_to_rgb_image(binary_labels: list[int], width: int = None, height: int = None) -> Image.Image:
	total_pixels = len(binary_labels) // 24
	if width is None or height is None:
	side = int(np.ceil(np.sqrt(total_pixels)))
	width = height = side

	needed_pixels = width * height
	needed_bits = needed_pixels * 24
	if len(binary_labels) < needed_bits:
	binary_labels += [0] * (needed_bits - len(binary_labels))

	pixels = []
	for i in range(0, needed_bits, 24):
	r_bits = binary_labels[i:i+8]
	g_bits = binary_labels[i+8:i+16]
	b_bits = binary_labels[i+16:i+24]
	r = sum(b << (7-j) for j, b in enumerate(r_bits))
	g = sum(b << (7-j) for j, b in enumerate(g_bits))
	b = sum(b << (7-j) for j, b in enumerate(b_bits))
	pixels.append((r, g, b))

	array = np.array(pixels, dtype=np.uint8).reshape((height, width, 3))
	img = Image.fromarray(array, mode='RGB')
	return img

	# === Streamlit App ===

	st.title("ASCII & Binary Label Converter")
	tab1, tab2 = st.tabs(["Text to Binary Labels (31)", "EF → Binary (31)"])

	# Tab 1: Text to Binary
	with tab1:
	user_input = st.text_input("Enter text", value="DNA")
	if user_input:
	ascii_codes = [ord(c) for c in user_input]
	binary_labels = string_to_binary_labels(user_input)

	st.subheader("ASCII Codes")
	st.write(ascii_codes)

	st.subheader("Binary Labels per Character")
	grouped = [binary_labels[i:i+6] for i in range(0, len(binary_labels), 6)]
	for i, bits in enumerate(grouped):
	st.write(f"'{user_input[i]}' → {bits}")

	st.subheader("Binary Labels (31-bit groups)")
	groups = []
	for i in range(0, len(binary_labels), 31):
	group = binary_labels[i:i+31]
	group += [0] * (31 - len(group))
	groups.append(group + [sum(group)])

	df = pd.DataFrame(groups, columns=[str(h) for h in mutation_site_headers] + ["Edited Sites"])
	st.dataframe(df)
	st.download_button("Download as CSV", df.to_csv(index=False), "text_31_binary_labels.csv")

	ascending_headers = sorted(mutation_site_headers_actual)
	df_sorted = df[[str(h) for h in ascending_headers if str(h) in df.columns]]
	st.subheader("Binary Labels (Ascending Order 3244 → 4882)")
	st.dataframe(df_sorted)
	st.download_button("Download Ascending Order CSV", df_sorted.to_csv(index=False), "text_binary_labels_ascending.csv")

	# Tab 2: EF → Binary
	with tab2:
	st.write("Upload an Editing Frequency CSV or enter manually:")
	st.write("Note: Please upload CSV files without column headers, in ascending order from 3244 to 4882.")
	ef_file = st.file_uploader("Upload EF CSV", type=["csv"], key="ef")

	if ef_file:
	ef_df = pd.read_csv(ef_file, header=None)
	ef_df.columns = [str(site) for site in sorted(mutation_site_headers_actual)]
	else:
	ef_df = pd.DataFrame(columns=[str(site) for site in sorted(mutation_site_headers_actual)])

	edited_df = st.data_editor(ef_df, num_rows="dynamic")

	if st.button("Convert to Binary Labels"):
	binary_part = pd.DataFrame()
	for col in sorted(mutation_site_headers_actual):
	col_str = str(col)
	threshold = thresholds_actual[col]
	binary_part[col_str] = (edited_df[col_str].astype(float) >= threshold).astype(int)

	binary_reordered = binary_part[[str(h) for h in mutation_site_headers if str(h) in binary_part.columns]]

	def color_binary(val):
	if val == 1: return "background-color: lightgreen"
	if val == 0: return "background-color: lightcoral"
	return ""

	st.subheader("Binary Labels (Reordered 4402→3244, 4882→4455)")
	styled = binary_reordered.style.applymap(color_binary)
	st.dataframe(styled)
	st.download_button("Download CSV", binary_reordered.to_csv(index=False), "ef_binary_labels.csv")

	all_bits = binary_reordered.values.flatten().tolist()
	decoded_string = binary_labels_to_string(all_bits)
	st.subheader("Decoded String (continuous across rows)")
	st.write(decoded_string)

	st.subheader("Binary Labels (Ascending 3244→4882)")
	st.dataframe(binary_part.style.applymap(color_binary))
	st.download_button("Download Ascending Order CSV", binary_part.to_csv(index=False), "ef_binary_labels_ascending.csv")