Jayesh13's picture
Update app.py
5a2a00f verified
raw
history blame
3.19 kB
import os
os.system("pip install streamlit pandas xlsxwriter openpyxl")
import streamlit as st
import pandas as pd
from io import BytesIO
st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
st.title("🧬 Protein Repeat Comparator")
st.write("Upload two Excel files with protein data. Frequency values should start from the second row.")
uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
if uploaded_file1 and uploaded_file2:
try:
df1 = pd.read_excel(uploaded_file1, header=0)
df2 = pd.read_excel(uploaded_file2, header=0)
df1.columns = df1.columns.astype(str)
df2.columns = df2.columns.astype(str)
id_col = df1.columns[0]
name_col = df1.columns[1]
repeat_columns = df1.columns[2:]
differences = []
for _, row1 in df1.iterrows():
entry_id = row1[id_col]
protein_name = row1[name_col]
row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
if row2_match.empty:
continue
row2 = row2_match.iloc[0]
for repeat_col in repeat_columns:
freq1 = row1[repeat_col]
freq2 = row2[repeat_col]
if pd.isna(freq1) or pd.isna(freq2) or freq1 == 0:
continue
if freq1 != freq2:
diff = abs(freq1 - freq2)
pct_change = ((freq2 - freq1) / freq1) * 100
differences.append({
id_col: entry_id,
name_col: protein_name,
"Repeat": repeat_col,
"Frequency File 1": freq1,
"Frequency File 2": freq2,
"Difference": diff,
"%age Change": pct_change
})
if differences:
result_df = pd.DataFrame(differences)
result_df = result_df.sort_values(by="Difference", ascending=False)
# Style for Excel (green for +, red for -)
def color_pct(val):
if val > 0:
return 'color: green'
elif val < 0:
return 'color: red'
return ''
styled_df = result_df.style.applymap(color_pct, subset=["%age Change"])
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
styled_df.to_excel(writer, index=False, sheet_name="Changed Repeats")
output.seek(0)
st.success("βœ… Comparison complete. Showing only changed repeats.")
st.download_button(
label="πŸ“₯ Download Excel",
data=output,
file_name="changed_repeats_with_percentage.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.info("No changes in repeat frequencies were found.")
except Exception as e:
st.error(f"⚠ Error: {e}")