Spaces:
Sleeping
Sleeping
File size: 2,840 Bytes
5cd21b9 b57c1d3 26343ed b57c1d3 5296403 b57c1d3 5296403 5223b02 5296403 128ce67 b533841 5296403 a0d5a6c 5296403 5223b02 5296403 5223b02 128ce67 5223b02 5296403 128ce67 5223b02 5296403 128ce67 5296403 128ce67 5223b02 5296403 b533841 5296403 5223b02 5296403 a0d5a6c 5223b02 a0d5a6c 128ce67 a0d5a6c 5223b02 5296403 a0d5a6c 5296403 a0d5a6c 5296403 a0d5a6c 5296403 26343ed 128ce67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
os.system("pip install streamlit pandas xlsxwriter openpyxl")
import streamlit as st
import pandas as pd
from io import BytesIO
st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
st.title("🧬 Protein Repeat Comparator")
st.write("Upload two Excel files with protein data. Frequency values should start from the second row.")
uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
if uploaded_file1 and uploaded_file2:
try:
# Read files, header starts at second row
df1 = pd.read_excel(uploaded_file1, header=1)
df2 = pd.read_excel(uploaded_file2, header=1)
# Ensure column names are strings
df1.columns = df1.columns.astype(str)
df2.columns = df2.columns.astype(str)
# Get ID and Name columns
id_col = df1.columns[0]
name_col = df1.columns[1]
repeat_columns = df1.columns[2:] # Repeat columns start from index 2
differences = []
for _, row1 in df1.iterrows():
entry_id = row1[id_col]
protein_name = row1[name_col]
row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
if row2_match.empty:
continue
row2 = row2_match.iloc[0]
for repeat_col in repeat_columns:
freq1 = row1[repeat_col]
freq2 = row2[repeat_col]
if pd.isna(freq1) or pd.isna(freq2):
continue
if freq1 != freq2:
diff = abs(freq1 - freq2)
differences.append({
id_col: entry_id,
name_col: protein_name,
"Repeat": repeat_col,
"Frequency File 1": freq1,
"Frequency File 2": freq2,
"Difference": diff
})
if differences:
result_df = pd.DataFrame(differences)
result_df = result_df.sort_values(by="Difference", ascending=False)
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
result_df.to_excel(writer, index=False)
output.seek(0)
st.success("✅ Comparison complete. Showing only changed repeats.")
st.download_button(
label="📥 Download Excel",
data=output,
file_name="changed_repeats.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.info("No changes in repeat frequencies were found.")
except Exception as e:
st.error(f"⚠️ Error: {e}") |