Jayesh13 commited on
Commit
128ce67
·
verified ·
1 Parent(s): a0d5a6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -19
app.py CHANGED
@@ -7,45 +7,51 @@ from io import BytesIO
7
 
8
  st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
9
  st.title("🧬 Protein Repeat Comparator")
10
- st.write("Upload two Excel files. Only changed repeat frequencies will be shown in the result.")
11
 
12
  uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
13
  uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
14
 
15
  if uploaded_file1 and uploaded_file2:
16
  try:
17
- # Read Excel files (header starts from second row)
18
  df1 = pd.read_excel(uploaded_file1, header=1)
19
  df2 = pd.read_excel(uploaded_file2, header=1)
20
 
21
- # Automatically get ID and Name column
 
 
 
 
22
  id_col = df1.columns[0]
23
  name_col = df1.columns[1]
24
- repeat_cols = df1.columns[2:]
25
-
26
- # Ensure all column names are strings
27
- repeat_cols = [str(col) for col in repeat_cols]
28
 
29
  records = []
30
 
31
- for _, row1 in df1.iterrows():
32
  entry_id = row1[id_col]
33
  protein_name = row1[name_col]
34
 
35
- match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
36
- if match.empty:
 
37
  continue
38
- row2 = match.iloc[0]
39
 
40
  for repeat in repeat_cols:
41
  freq1 = row1[repeat]
42
  freq2 = row2[repeat]
 
 
 
 
43
  if freq1 != freq2:
44
  diff = abs(freq1 - freq2)
45
  records.append({
46
  id_col: entry_id,
47
  name_col: protein_name,
48
- "Repeat": str(repeat),
49
  "Frequency File 1": freq1,
50
  "Frequency File 2": freq2,
51
  "Difference": diff
@@ -56,20 +62,19 @@ if uploaded_file1 and uploaded_file2:
56
  result_df = result_df.sort_values(by="Difference", ascending=False)
57
 
58
  output = BytesIO()
59
- with pd.ExcelWriter(output, engine="openpyxl") as writer:
60
  result_df.to_excel(writer, index=False)
61
  output.seek(0)
62
 
63
- st.success("✅ Comparison complete! Only changed repeats are included.")
64
  st.download_button(
65
- label="📥 Download Changed Repeats Excel",
66
  data=output,
67
- file_name="changed_protein_repeats.xlsx",
68
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
69
  )
70
  else:
71
- st.info("No changes in repeat frequencies were found.")
72
 
73
  except Exception as e:
74
- st.error(f"⚠️ Error: {e}")
75
-
 
7
 
8
  st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
9
  st.title("🧬 Protein Repeat Comparator")
10
+ st.write("Upload two Excel files (from 2nd row are frequencies). First column: Protein ID, Second column: Name.")
11
 
12
  uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
13
  uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
14
 
15
  if uploaded_file1 and uploaded_file2:
16
  try:
17
+ # Read both Excel files assuming header is in 2nd row (i.e., row index 1)
18
  df1 = pd.read_excel(uploaded_file1, header=1)
19
  df2 = pd.read_excel(uploaded_file2, header=1)
20
 
21
+ # Ensure columns are strings
22
+ df1.columns = df1.columns.astype(str)
23
+ df2.columns = df2.columns.astype(str)
24
+
25
+ # Get ID and Name columns
26
  id_col = df1.columns[0]
27
  name_col = df1.columns[1]
28
+ repeat_cols = df1.columns[2:] # all other columns are repeat names
 
 
 
29
 
30
  records = []
31
 
32
+ for idx, row1 in df1.iterrows():
33
  entry_id = row1[id_col]
34
  protein_name = row1[name_col]
35
 
36
+ # Get matching row from second file
37
+ row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
38
+ if row2_match.empty:
39
  continue
40
+ row2 = row2_match.iloc[0]
41
 
42
  for repeat in repeat_cols:
43
  freq1 = row1[repeat]
44
  freq2 = row2[repeat]
45
+
46
+ if pd.isna(freq1) or pd.isna(freq2):
47
+ continue # skip missing values
48
+
49
  if freq1 != freq2:
50
  diff = abs(freq1 - freq2)
51
  records.append({
52
  id_col: entry_id,
53
  name_col: protein_name,
54
+ "Repeat": repeat,
55
  "Frequency File 1": freq1,
56
  "Frequency File 2": freq2,
57
  "Difference": diff
 
62
  result_df = result_df.sort_values(by="Difference", ascending=False)
63
 
64
  output = BytesIO()
65
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
66
  result_df.to_excel(writer, index=False)
67
  output.seek(0)
68
 
69
+ st.success("✅ Comparison complete! Only differences are shown below.")
70
  st.download_button(
71
+ label="📥 Download Result Excel",
72
  data=output,
73
+ file_name="protein_repeat_diff.xlsx",
74
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
75
  )
76
  else:
77
+ st.info("No differences found between the two files.")
78
 
79
  except Exception as e:
80
+ st.error(f"⚠️ Error: {e}")