Spaces:

Jayesh13
/

Repeat_analysis_with_Comparator

Sleeping

App Files Files Community

Jayesh13 commited on Apr 13

Commit

c46f460

verified ·

1 Parent(s): 428afaa

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -48

app.py CHANGED Viewed

@@ -227,56 +227,101 @@ if app_choice == "🔁 Protein Repeat Finder":
         result_df = pd.DataFrame(rows)
         st.dataframe(result_df)
 # ------------------- COMPARATOR FUNCTIONALITY -------------------
 elif app_choice == "📊 Protein Comparator":
-    st.write("Upload two Excel files with protein data to compare repeat frequencies.")
-    file1 = st.file_uploader("Upload First Excel File", type=["xlsx"], key="comp1")
-    file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"], key="comp2")
-    if file1 and file2:
-        df1 = pd.read_excel(file1, header=0)
-        df2 = pd.read_excel(file2, header=0)
-        df1.columns = df1.columns.astype(str)
-        df2.columns = df2.columns.astype(str)
-        id_col = df1.columns[0]
-        name_col = df1.columns[1]
-        repeat_columns = df1.columns[2:]
-        differences = []
-        for i in range(len(df1)):
-            row1 = df1.iloc[i]
-            row2 = df2.iloc[i] if i < len(df2) else None
-            if row2 is not None:
-                diff_row = {
-                    "Entry": row1[id_col],
-                    "Protein Name": row1[name_col]
-                }
-                for repeat in repeat_columns:
-                    val1 = row1.get(repeat, 0)
-                    val2 = row2.get(repeat, 0)
-                    diff_row[repeat] = abs(val1 - val2)
-                differences.append(diff_row)
-        result_df = pd.DataFrame(differences)
-        st.dataframe(result_df)
-        def to_excel(df):
-            output = BytesIO()
-            writer = pd.ExcelWriter(output, engine='xlsxwriter')
-            df.to_excel(writer, index=False, sheet_name='Comparison')
-            writer.close()
-            output.seek(0)
-            return output
-        excel_file = to_excel(result_df)
-        st.download_button(
-            label="Download Comparison Excel",
-            data=excel_file,
-            file_name="comparison_result.xlsx",
-            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
-        )

         result_df = pd.DataFrame(rows)
         st.dataframe(result_df)
+# ------------------- COMPARATOR FUNCTIONALITY -------------------
 # ------------------- COMPARATOR FUNCTIONALITY -------------------
 elif app_choice == "📊 Protein Comparator":
+    st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
+    st.title("🧬 Protein Repeat Comparator")
+    st.write("Upload two Excel files with protein data. Frequency values should start from the first row (header).")
+    uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"], key="comp1")
+    uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"], key="comp2")
+    if uploaded_file1 and uploaded_file2:
+        try:
+            df1 = pd.read_excel(uploaded_file1, header=0)
+            df2 = pd.read_excel(uploaded_file2, header=0)
+            df1.columns = df1.columns.astype(str)
+            df2.columns = df2.columns.astype(str)
+            id_col = df1.columns[0]
+            name_col = df1.columns[1]
+            repeat_columns = df1.columns[2:]
+            differences = []
+            for _, row1 in df1.iterrows():
+                entry_id = row1[id_col]
+                protein_name = row1[name_col]
+                row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
+                if row2_match.empty:
+                    continue
+                row2 = row2_match.iloc[0]
+                for repeat_col in repeat_columns:
+                    freq1 = row1[repeat_col]
+                    freq2 = row2[repeat_col]
+                    if pd.isna(freq1) or pd.isna(freq2):
+                        continue
+                    if freq1 != freq2:
+                        if freq1 == 0:
+                            pct_change = "Infinity"
+                        else:
+                            pct_change = ((freq2 - freq1) / freq1) * 100
+                            pct_change = round(pct_change, 2)
+                        diff = abs(freq1 - freq2)
+                        differences.append({
+                            id_col: entry_id,
+                            name_col: protein_name,
+                            "Repeat": repeat_col,
+                            "Frequency File 1": freq1,
+                            "Frequency File 2": freq2,
+                            "Difference": diff,
+                            "%age Change": pct_change
+                        })
+            if differences:
+                result_df = pd.DataFrame(differences)
+                result_df = result_df.sort_values(by="Difference", ascending=False)
+                # Show DataFrame in Streamlit app
+                st.subheader("🔍 View Changed Repeats")
+                st.dataframe(result_df, use_container_width=True)
+                # Apply styling
+                def color_pct(val):
+                    if isinstance(val, str) and val == "Infinity":
+                        return 'color: green'
+                    elif isinstance(val, (int, float)):
+                        if val > 0:
+                            return 'color: green'
+                        elif val < 0:
+                            return 'color: red'
+                    return ''
+                styled_df = result_df.style.applymap(color_pct, subset=["%age Change"])
+                # Save styled output
+                output = BytesIO()
+                with pd.ExcelWriter(output, engine='openpyxl') as writer:
+                    styled_df.to_excel(writer, index=False, sheet_name="Changed Repeats")
+                output.seek(0)
+                st.download_button(
+                    label="📥 Download Excel File",
+                    data=output,
+                    file_name="changed_repeats_with_percentage.xlsx",
+                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                )
+            else:
+                st.info("No changes in repeat frequencies were found.")
+        except Exception as e:
+            st.error(f"⚠ Error: {e}")