Spaces:

Jayesh13
/

Protein_repeat_comparator

Sleeping

App Files Files Community

Jayesh13 commited on Apr 10

Commit

5223b02

verified ·

1 Parent(s): 26343ed

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -39

app.py CHANGED Viewed

@@ -7,54 +7,65 @@ from io import BytesIO
 st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
 st.title("🧬 Protein Repeat Comparator")
-st.write("Upload two Excel files containing protein repeat frequencies. The tool will compare the values and return a sorted Excel file based on frequency differences.")
-# File upload
 uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
 uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
 if uploaded_file1 and uploaded_file2:
     try:
-        # Read both Excel files
-        df1 = pd.read_excel(uploaded_file1)
-        df2 = pd.read_excel(uploaded_file2)
-        # Ensure structure compatibility
-        common_cols = df1.columns.intersection(df2.columns)
-        df1 = df1[common_cols]
-        df2 = df2[common_cols]
-        # Merge on Entry ID and Protein Name
-        merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2'))
-        # Compute differences
-        repeat_cols = common_cols[2:]
-        diff_data = {
-            "Entry ID": merged["Entry ID"],
-            "Protein Name": merged["Protein Name"]
-        }
-        for col in repeat_cols:
-            diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs()
-        diff_df = pd.DataFrame(diff_data)
-        diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1)
-        sorted_diff = diff_df.sort_values(by="Total Difference", ascending=False)
-        # Save to in-memory buffer
-        output_buffer = BytesIO()
-        with pd.ExcelWriter(output_buffer, engine="openpyxl") as writer:
-            sorted_diff.to_excel(writer, index=False)
-        output_buffer.seek(0)
-        st.success("✅ Comparison complete!")
         st.download_button(
-            label="📥 Download Comparison Excel",
-            data=output_buffer,
-            file_name="protein_repeat_comparison.xlsx",
             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
         )
     except Exception as e:
         st.error(f"⚠️ Error: {e}")

 st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
 st.title("🧬 Protein Repeat Comparator")
+st.write("Upload two Excel files. Only changed repeat frequencies will be shown in the result.")
 uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
 uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
 if uploaded_file1 and uploaded_file2:
     try:
+        # Read both Excel files, assuming header is in second row
+        df1 = pd.read_excel(uploaded_file1, header=1)
+        df2 = pd.read_excel(uploaded_file2, header=1)
+        # Column names
+        id_col = "Entry ID"
+        name_col = "Protein Name"
+        repeat_cols = [col for col in df1.columns if col not in [id_col, name_col]]
+        records = []
+        for _, row1 in df1.iterrows():
+            entry_id = row1[id_col]
+            protein_name = row1[name_col]
+            # Match protein in second file
+            match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
+            if match.empty:
+                continue
+            row2 = match.iloc[0]
+            for repeat in repeat_cols:
+                freq1 = row1[repeat]
+                freq2 = row2[repeat]
+                if freq1 != freq2:
+                    diff = abs(freq1 - freq2)
+                    records.append({
+                        "Entry ID": entry_id,
+                        "Protein Name": protein_name,
+                        "Repeat": repeat,
+                        "Frequency File 1": freq1,
+                        "Frequency File 2": freq2,
+                        "Difference": diff
+                    })
+        result_df = pd.DataFrame(records)
+        result_df = result_df.sort_values(by="Difference", ascending=False)
+        # In-memory Excel
+        output = BytesIO()
+        with pd.ExcelWriter(output, engine="openpyxl") as writer:
+            result_df.to_excel(writer, index=False)
+        output.seek(0)
+        st.success("✅ Comparison complete! Showing only changed repeats.")
         st.download_button(
+            label="📥 Download Changed Repeats Excel",
+            data=output,
+            file_name="changed_protein_repeats.xlsx",
             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
         )
     except Exception as e:
         st.error(f"⚠️ Error: {e}")