Jayesh13 commited on
Commit
5223b02
·
verified ·
1 Parent(s): 26343ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -39
app.py CHANGED
@@ -7,54 +7,65 @@ from io import BytesIO
7
 
8
  st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
9
  st.title("🧬 Protein Repeat Comparator")
10
- st.write("Upload two Excel files containing protein repeat frequencies. The tool will compare the values and return a sorted Excel file based on frequency differences.")
11
 
12
- # File upload
13
  uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
14
  uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
15
 
16
  if uploaded_file1 and uploaded_file2:
17
  try:
18
- # Read both Excel files
19
- df1 = pd.read_excel(uploaded_file1)
20
- df2 = pd.read_excel(uploaded_file2)
21
-
22
- # Ensure structure compatibility
23
- common_cols = df1.columns.intersection(df2.columns)
24
- df1 = df1[common_cols]
25
- df2 = df2[common_cols]
26
-
27
- # Merge on Entry ID and Protein Name
28
- merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2'))
29
-
30
- # Compute differences
31
- repeat_cols = common_cols[2:]
32
- diff_data = {
33
- "Entry ID": merged["Entry ID"],
34
- "Protein Name": merged["Protein Name"]
35
- }
36
-
37
- for col in repeat_cols:
38
- diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs()
39
-
40
- diff_df = pd.DataFrame(diff_data)
41
- diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1)
42
- sorted_diff = diff_df.sort_values(by="Total Difference", ascending=False)
43
-
44
- # Save to in-memory buffer
45
- output_buffer = BytesIO()
46
- with pd.ExcelWriter(output_buffer, engine="openpyxl") as writer:
47
- sorted_diff.to_excel(writer, index=False)
48
- output_buffer.seek(0)
49
-
50
- st.success(" Comparison complete!")
 
 
 
 
 
 
 
 
 
 
 
 
51
  st.download_button(
52
- label="📥 Download Comparison Excel",
53
- data=output_buffer,
54
- file_name="protein_repeat_comparison.xlsx",
55
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
56
  )
57
 
58
  except Exception as e:
59
  st.error(f"⚠️ Error: {e}")
60
-
 
7
 
8
  st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
9
  st.title("🧬 Protein Repeat Comparator")
10
+ st.write("Upload two Excel files. Only changed repeat frequencies will be shown in the result.")
11
 
 
12
  uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
13
  uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
14
 
15
  if uploaded_file1 and uploaded_file2:
16
  try:
17
+ # Read both Excel files, assuming header is in second row
18
+ df1 = pd.read_excel(uploaded_file1, header=1)
19
+ df2 = pd.read_excel(uploaded_file2, header=1)
20
+
21
+ # Column names
22
+ id_col = "Entry ID"
23
+ name_col = "Protein Name"
24
+ repeat_cols = [col for col in df1.columns if col not in [id_col, name_col]]
25
+
26
+ records = []
27
+
28
+ for _, row1 in df1.iterrows():
29
+ entry_id = row1[id_col]
30
+ protein_name = row1[name_col]
31
+
32
+ # Match protein in second file
33
+ match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
34
+ if match.empty:
35
+ continue
36
+ row2 = match.iloc[0]
37
+
38
+ for repeat in repeat_cols:
39
+ freq1 = row1[repeat]
40
+ freq2 = row2[repeat]
41
+ if freq1 != freq2:
42
+ diff = abs(freq1 - freq2)
43
+ records.append({
44
+ "Entry ID": entry_id,
45
+ "Protein Name": protein_name,
46
+ "Repeat": repeat,
47
+ "Frequency File 1": freq1,
48
+ "Frequency File 2": freq2,
49
+ "Difference": diff
50
+ })
51
+
52
+ result_df = pd.DataFrame(records)
53
+ result_df = result_df.sort_values(by="Difference", ascending=False)
54
+
55
+ # In-memory Excel
56
+ output = BytesIO()
57
+ with pd.ExcelWriter(output, engine="openpyxl") as writer:
58
+ result_df.to_excel(writer, index=False)
59
+ output.seek(0)
60
+
61
+ st.success("✅ Comparison complete! Showing only changed repeats.")
62
  st.download_button(
63
+ label="📥 Download Changed Repeats Excel",
64
+ data=output,
65
+ file_name="changed_protein_repeats.xlsx",
66
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
67
  )
68
 
69
  except Exception as e:
70
  st.error(f"⚠️ Error: {e}")
71
+