import os os.system("pip install streamlit pandas xlsxwriter openpyxl") import streamlit as st import pandas as pd from io import BytesIO st.set_page_config(page_title="Protein Repeat Comparator", layout="centered") st.title("🧬 Protein Repeat Comparator") st.write("Upload two Excel files. Only changed repeat frequencies will be shown in the result.") uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"]) uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"]) if uploaded_file1 and uploaded_file2: try: # Read both Excel files, assuming header is in second row df1 = pd.read_excel(uploaded_file1, header=1) df2 = pd.read_excel(uploaded_file2, header=1) # Automatically detect first two columns id_col = df1.columns[0] name_col = df1.columns[1] repeat_cols = df1.columns[2:] records = [] for _, row1 in df1.iterrows(): entry_id = row1[id_col] protein_name = row1[name_col] # Match in second file match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)] if match.empty: continue row2 = match.iloc[0] for repeat in repeat_cols: freq1 = row1[repeat] freq2 = row2[repeat] if freq1 != freq2: diff = abs(freq1 - freq2) records.append({ id_col: entry_id, name_col: protein_name, "Repeat": repeat, "Frequency File 1": freq1, "Frequency File 2": freq2, "Difference": diff }) result_df = pd.DataFrame(records) result_df = result_df.sort_values(by="Difference", ascending=False) output = BytesIO() with pd.ExcelWriter(output, engine="openpyxl") as writer: result_df.to_excel(writer, index=False) output.seek(0) st.success("✅ Comparison complete! Showing only changed repeats.") st.download_button( label="📥 Download Changed Repeats Excel", data=output, file_name="changed_protein_repeats.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) except Exception as e: st.error(f"⚠️ Error: {e}")