Jayesh13's picture
Update app.py
b533841 verified
raw
history blame
2.46 kB
import os
os.system("pip install streamlit pandas xlsxwriter openpyxl")
import streamlit as st
import pandas as pd
from io import BytesIO
st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
st.title("🧬 Protein Repeat Comparator")
st.write("Upload two Excel files. Only changed repeat frequencies will be shown in the result.")
uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
if uploaded_file1 and uploaded_file2:
try:
# Read both Excel files, assuming header is in second row
df1 = pd.read_excel(uploaded_file1, header=1)
df2 = pd.read_excel(uploaded_file2, header=1)
# Automatically detect first two columns
id_col = df1.columns[0]
name_col = df1.columns[1]
repeat_cols = df1.columns[2:]
records = []
for _, row1 in df1.iterrows():
entry_id = row1[id_col]
protein_name = row1[name_col]
# Match in second file
match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
if match.empty:
continue
row2 = match.iloc[0]
for repeat in repeat_cols:
freq1 = row1[repeat]
freq2 = row2[repeat]
if freq1 != freq2:
diff = abs(freq1 - freq2)
records.append({
id_col: entry_id,
name_col: protein_name,
"Repeat": repeat,
"Frequency File 1": freq1,
"Frequency File 2": freq2,
"Difference": diff
})
result_df = pd.DataFrame(records)
result_df = result_df.sort_values(by="Difference", ascending=False)
output = BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as writer:
result_df.to_excel(writer, index=False)
output.seek(0)
st.success("βœ… Comparison complete! Showing only changed repeats.")
st.download_button(
label="πŸ“₯ Download Changed Repeats Excel",
data=output,
file_name="changed_protein_repeats.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
except Exception as e:
st.error(f"⚠️ Error: {e}")