import os os.system("pip install streamlit pandas xlsxwriter openpyxl") import streamlit as st import pandas as pd from io import BytesIO st.set_page_config(page_title="Protein Repeat Comparator", layout="centered") st.title("🧬 Protein Repeat Comparator") st.write("Upload two Excel files with protein data. Frequency values should start from the first row (header).") uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"]) uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"]) if uploaded_file1 and uploaded_file2: try: df1 = pd.read_excel(uploaded_file1, header=0) df2 = pd.read_excel(uploaded_file2, header=0) df1.columns = df1.columns.astype(str) df2.columns = df2.columns.astype(str) id_col = df1.columns[0] name_col = df1.columns[1] repeat_columns = df1.columns[2:] differences = [] for _, row1 in df1.iterrows(): entry_id = row1[id_col] protein_name = row1[name_col] row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)] if row2_match.empty: continue row2 = row2_match.iloc[0] for repeat_col in repeat_columns: freq1 = row1[repeat_col] freq2 = row2[repeat_col] if pd.isna(freq1) or pd.isna(freq2): continue if freq1 != freq2: if freq1 == 0: pct_change = "Infinity" else: pct_change = ((freq2 - freq1) / freq1) * 100 pct_change = round(pct_change, 2) diff = abs(freq1 - freq2) differences.append({ id_col: entry_id, name_col: protein_name, "Repeat": repeat_col, "Frequency File 1": freq1, "Frequency File 2": freq2, "Difference": diff, "%age Change": pct_change }) if differences: result_df = pd.DataFrame(differences) result_df = result_df.sort_values(by="Difference", ascending=False) # Show DataFrame in Streamlit app st.subheader("🔍 View Changed Repeats") st.dataframe(result_df, use_container_width=True) # Apply styling def color_pct(val): if isinstance(val, str) and val == "Infinity": return 'color: green' elif isinstance(val, (int, float)): if val > 0: return 'color: green' elif val < 0: return 'color: red' return '' styled_df = result_df.style.applymap(color_pct, subset=["%age Change"]) # Save styled output output = BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: styled_df.to_excel(writer, index=False, sheet_name="Changed Repeats") output.seek(0) st.download_button( label="📥 Download Excel File", data=output, file_name="changed_repeats_with_percentage.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) else: st.info("No changes in repeat frequencies were found.") except Exception as e: st.error(f"⚠ Error: {e}")