Jayesh13 commited on
Commit
d15c119
·
verified ·
1 Parent(s): 2f64609

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ def compare_protein_repeat_frequencies(file1, file2):
5
+ # Load both Excel files
6
+ df1 = pd.read_excel(file1.name)
7
+ df2 = pd.read_excel(file2.name)
8
+
9
+ # Ensure both have common structure
10
+ common_cols = df1.columns.intersection(df2.columns)
11
+ df1 = df1[common_cols]
12
+ df2 = df2[common_cols]
13
+
14
+ # Merge on Entry ID and Protein Name
15
+ merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2'))
16
+
17
+ # Calculate differences
18
+ repeat_cols = common_cols[2:] # exclude ID and Name
19
+ diff_data = {
20
+ "Entry ID": merged["Entry ID"],
21
+ "Protein Name": merged["Protein Name"]
22
+ }
23
+
24
+ for col in repeat_cols:
25
+ diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs()
26
+
27
+ # Create DataFrame of differences
28
+ diff_df = pd.DataFrame(diff_data)
29
+ diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1)
30
+ diff_df_sorted = diff_df.sort_values(by="Total Difference", ascending=False)
31
+
32
+ # Save and return
33
+ output_path = "comparison_output.xlsx"
34
+ diff_df_sorted.to_excel(output_path, index=False)
35
+ return output_path
36
+
37
+ # Gradio UI
38
+ interface = gr.Interface(
39
+ fn=compare_protein_repeat_frequencies,
40
+ inputs=[
41
+ gr.File(label="Upload First Excel File"),
42
+ gr.File(label="Upload Second Excel File")
43
+ ],
44
+ outputs=gr.File(label="Download Comparison Excel"),
45
+ title="Protein Repeat Comparator",
46
+ description="Upload two Excel files containing protein repeat data. The app will compare frequencies and return a sorted Excel file showing differences."
47
+ )
48
+
49
+ interface.launch()