Jayesh13 commited on
Commit
5296403
·
verified ·
1 Parent(s): 128ce67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -7,58 +7,58 @@ from io import BytesIO
7
 
8
  st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
9
  st.title("🧬 Protein Repeat Comparator")
10
- st.write("Upload two Excel files (from 2nd row are frequencies). First column: Protein ID, Second column: Name.")
11
 
12
  uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
13
  uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
14
 
15
  if uploaded_file1 and uploaded_file2:
16
  try:
17
- # Read both Excel files assuming header is in 2nd row (i.e., row index 1)
18
  df1 = pd.read_excel(uploaded_file1, header=1)
19
  df2 = pd.read_excel(uploaded_file2, header=1)
20
 
21
- # Ensure columns are strings
22
  df1.columns = df1.columns.astype(str)
23
  df2.columns = df2.columns.astype(str)
24
 
25
  # Get ID and Name columns
26
  id_col = df1.columns[0]
27
  name_col = df1.columns[1]
28
- repeat_cols = df1.columns[2:] # all other columns are repeat names
29
 
30
- records = []
31
 
32
- for idx, row1 in df1.iterrows():
33
  entry_id = row1[id_col]
34
  protein_name = row1[name_col]
35
 
36
- # Get matching row from second file
37
  row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
38
  if row2_match.empty:
39
  continue
 
40
  row2 = row2_match.iloc[0]
41
 
42
- for repeat in repeat_cols:
43
- freq1 = row1[repeat]
44
- freq2 = row2[repeat]
45
 
46
  if pd.isna(freq1) or pd.isna(freq2):
47
- continue # skip missing values
48
 
49
  if freq1 != freq2:
50
  diff = abs(freq1 - freq2)
51
- records.append({
52
  id_col: entry_id,
53
  name_col: protein_name,
54
- "Repeat": repeat,
55
  "Frequency File 1": freq1,
56
  "Frequency File 2": freq2,
57
  "Difference": diff
58
  })
59
 
60
- if records:
61
- result_df = pd.DataFrame(records)
62
  result_df = result_df.sort_values(by="Difference", ascending=False)
63
 
64
  output = BytesIO()
@@ -66,15 +66,15 @@ if uploaded_file1 and uploaded_file2:
66
  result_df.to_excel(writer, index=False)
67
  output.seek(0)
68
 
69
- st.success("✅ Comparison complete! Only differences are shown below.")
70
  st.download_button(
71
- label="📥 Download Result Excel",
72
  data=output,
73
- file_name="protein_repeat_diff.xlsx",
74
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
75
  )
76
  else:
77
- st.info("No differences found between the two files.")
78
 
79
  except Exception as e:
80
  st.error(f"⚠️ Error: {e}")
 
7
 
8
  st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
9
  st.title("🧬 Protein Repeat Comparator")
10
+ st.write("Upload two Excel files with protein data. Frequency values should start from the second row.")
11
 
12
  uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
13
  uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])
14
 
15
  if uploaded_file1 and uploaded_file2:
16
  try:
17
+ # Read files, header starts at second row
18
  df1 = pd.read_excel(uploaded_file1, header=1)
19
  df2 = pd.read_excel(uploaded_file2, header=1)
20
 
21
+ # Ensure column names are strings
22
  df1.columns = df1.columns.astype(str)
23
  df2.columns = df2.columns.astype(str)
24
 
25
  # Get ID and Name columns
26
  id_col = df1.columns[0]
27
  name_col = df1.columns[1]
28
+ repeat_columns = df1.columns[2:] # Repeat columns start from index 2
29
 
30
+ differences = []
31
 
32
+ for _, row1 in df1.iterrows():
33
  entry_id = row1[id_col]
34
  protein_name = row1[name_col]
35
 
 
36
  row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
37
  if row2_match.empty:
38
  continue
39
+
40
  row2 = row2_match.iloc[0]
41
 
42
+ for repeat_col in repeat_columns:
43
+ freq1 = row1[repeat_col]
44
+ freq2 = row2[repeat_col]
45
 
46
  if pd.isna(freq1) or pd.isna(freq2):
47
+ continue
48
 
49
  if freq1 != freq2:
50
  diff = abs(freq1 - freq2)
51
+ differences.append({
52
  id_col: entry_id,
53
  name_col: protein_name,
54
+ "Repeat": repeat_col,
55
  "Frequency File 1": freq1,
56
  "Frequency File 2": freq2,
57
  "Difference": diff
58
  })
59
 
60
+ if differences:
61
+ result_df = pd.DataFrame(differences)
62
  result_df = result_df.sort_values(by="Difference", ascending=False)
63
 
64
  output = BytesIO()
 
66
  result_df.to_excel(writer, index=False)
67
  output.seek(0)
68
 
69
+ st.success("✅ Comparison complete. Showing only changed repeats.")
70
  st.download_button(
71
+ label="📥 Download Excel",
72
  data=output,
73
+ file_name="changed_repeats.xlsx",
74
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
75
  )
76
  else:
77
+ st.info("No changes in repeat frequencies were found.")
78
 
79
  except Exception as e:
80
  st.error(f"⚠️ Error: {e}")