Jayesh13's picture
Update app.py
5223b02 verified
raw
history blame
2.53 kB
import os
os.system("pip install streamlit pandas xlsxwriter openpyxl")
import streamlit as st
import pandas as pd
from io import BytesIO
st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
st.title("🧬 Protein Repeat Comparator")
st.write("Upload two Excel files. Only changed repeat frequencies will be shown in the result.")
uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
if uploaded_file1 and uploaded_file2:
try:
# Read both Excel files, assuming header is in second row
df1 = pd.read_excel(uploaded_file1, header=1)
df2 = pd.read_excel(uploaded_file2, header=1)
# Column names
id_col = "Entry ID"
name_col = "Protein Name"
repeat_cols = [col for col in df1.columns if col not in [id_col, name_col]]
records = []
for _, row1 in df1.iterrows():
entry_id = row1[id_col]
protein_name = row1[name_col]
# Match protein in second file
match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
if match.empty:
continue
row2 = match.iloc[0]
for repeat in repeat_cols:
freq1 = row1[repeat]
freq2 = row2[repeat]
if freq1 != freq2:
diff = abs(freq1 - freq2)
records.append({
"Entry ID": entry_id,
"Protein Name": protein_name,
"Repeat": repeat,
"Frequency File 1": freq1,
"Frequency File 2": freq2,
"Difference": diff
})
result_df = pd.DataFrame(records)
result_df = result_df.sort_values(by="Difference", ascending=False)
# In-memory Excel
output = BytesIO()
with pd.ExcelWriter(output, engine="openpyxl") as writer:
result_df.to_excel(writer, index=False)
output.seek(0)
st.success("βœ… Comparison complete! Showing only changed repeats.")
st.download_button(
label="πŸ“₯ Download Changed Repeats Excel",
data=output,
file_name="changed_protein_repeats.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
except Exception as e:
st.error(f"⚠️ Error: {e}")