Jayesh13 commited on
Commit
c46f460
Β·
verified Β·
1 Parent(s): 428afaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -48
app.py CHANGED
@@ -227,56 +227,101 @@ if app_choice == "πŸ” Protein Repeat Finder":
227
  result_df = pd.DataFrame(rows)
228
  st.dataframe(result_df)
229
 
 
230
  # ------------------- COMPARATOR FUNCTIONALITY -------------------
231
  elif app_choice == "πŸ“Š Protein Comparator":
232
- st.write("Upload two Excel files with protein data to compare repeat frequencies.")
233
-
234
- file1 = st.file_uploader("Upload First Excel File", type=["xlsx"], key="comp1")
235
- file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"], key="comp2")
236
-
237
- if file1 and file2:
238
- df1 = pd.read_excel(file1, header=0)
239
- df2 = pd.read_excel(file2, header=0)
240
-
241
- df1.columns = df1.columns.astype(str)
242
- df2.columns = df2.columns.astype(str)
243
-
244
- id_col = df1.columns[0]
245
- name_col = df1.columns[1]
246
- repeat_columns = df1.columns[2:]
247
-
248
- differences = []
249
-
250
- for i in range(len(df1)):
251
- row1 = df1.iloc[i]
252
- row2 = df2.iloc[i] if i < len(df2) else None
253
- if row2 is not None:
254
- diff_row = {
255
- "Entry": row1[id_col],
256
- "Protein Name": row1[name_col]
257
- }
258
- for repeat in repeat_columns:
259
- val1 = row1.get(repeat, 0)
260
- val2 = row2.get(repeat, 0)
261
- diff_row[repeat] = abs(val1 - val2)
262
- differences.append(diff_row)
263
-
264
- result_df = pd.DataFrame(differences)
265
- st.dataframe(result_df)
266
 
267
- def to_excel(df):
268
- output = BytesIO()
269
- writer = pd.ExcelWriter(output, engine='xlsxwriter')
270
- df.to_excel(writer, index=False, sheet_name='Comparison')
271
- writer.close()
272
- output.seek(0)
273
- return output
274
 
275
- excel_file = to_excel(result_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
- st.download_button(
278
- label="Download Comparison Excel",
279
- data=excel_file,
280
- file_name="comparison_result.xlsx",
281
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
282
- )
 
227
  result_df = pd.DataFrame(rows)
228
  st.dataframe(result_df)
229
 
230
+ # ------------------- COMPARATOR FUNCTIONALITY -------------------
231
  # ------------------- COMPARATOR FUNCTIONALITY -------------------
232
  elif app_choice == "πŸ“Š Protein Comparator":
233
+ st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
234
+ st.title("🧬 Protein Repeat Comparator")
235
+ st.write("Upload two Excel files with protein data. Frequency values should start from the first row (header).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"], key="comp1")
238
+ uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"], key="comp2")
 
 
 
 
 
239
 
240
+ if uploaded_file1 and uploaded_file2:
241
+ try:
242
+ df1 = pd.read_excel(uploaded_file1, header=0)
243
+ df2 = pd.read_excel(uploaded_file2, header=0)
244
+
245
+ df1.columns = df1.columns.astype(str)
246
+ df2.columns = df2.columns.astype(str)
247
+
248
+ id_col = df1.columns[0]
249
+ name_col = df1.columns[1]
250
+ repeat_columns = df1.columns[2:]
251
+
252
+ differences = []
253
+
254
+ for _, row1 in df1.iterrows():
255
+ entry_id = row1[id_col]
256
+ protein_name = row1[name_col]
257
+
258
+ row2_match = df2[(df2[id_col] == entry_id) & (df2[name_col] == protein_name)]
259
+ if row2_match.empty:
260
+ continue
261
+
262
+ row2 = row2_match.iloc[0]
263
+
264
+ for repeat_col in repeat_columns:
265
+ freq1 = row1[repeat_col]
266
+ freq2 = row2[repeat_col]
267
+
268
+ if pd.isna(freq1) or pd.isna(freq2):
269
+ continue
270
+
271
+ if freq1 != freq2:
272
+ if freq1 == 0:
273
+ pct_change = "Infinity"
274
+ else:
275
+ pct_change = ((freq2 - freq1) / freq1) * 100
276
+ pct_change = round(pct_change, 2)
277
+
278
+ diff = abs(freq1 - freq2)
279
+ differences.append({
280
+ id_col: entry_id,
281
+ name_col: protein_name,
282
+ "Repeat": repeat_col,
283
+ "Frequency File 1": freq1,
284
+ "Frequency File 2": freq2,
285
+ "Difference": diff,
286
+ "%age Change": pct_change
287
+ })
288
+
289
+ if differences:
290
+ result_df = pd.DataFrame(differences)
291
+ result_df = result_df.sort_values(by="Difference", ascending=False)
292
+
293
+ # Show DataFrame in Streamlit app
294
+ st.subheader("πŸ” View Changed Repeats")
295
+ st.dataframe(result_df, use_container_width=True)
296
+
297
+ # Apply styling
298
+ def color_pct(val):
299
+ if isinstance(val, str) and val == "Infinity":
300
+ return 'color: green'
301
+ elif isinstance(val, (int, float)):
302
+ if val > 0:
303
+ return 'color: green'
304
+ elif val < 0:
305
+ return 'color: red'
306
+ return ''
307
+
308
+ styled_df = result_df.style.applymap(color_pct, subset=["%age Change"])
309
+
310
+ # Save styled output
311
+ output = BytesIO()
312
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
313
+ styled_df.to_excel(writer, index=False, sheet_name="Changed Repeats")
314
+ output.seek(0)
315
+
316
+ st.download_button(
317
+ label="πŸ“₯ Download Excel File",
318
+ data=output,
319
+ file_name="changed_repeats_with_percentage.xlsx",
320
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
321
+ )
322
+ else:
323
+ st.info("No changes in repeat frequencies were found.")
324
+
325
+ except Exception as e:
326
+ st.error(f"⚠ Error: {e}")
327