File size: 1,644 Bytes
d15c119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import pandas as pd

def compare_protein_repeat_frequencies(file1, file2):
    # Load both Excel files
    df1 = pd.read_excel(file1.name)
    df2 = pd.read_excel(file2.name)

    # Ensure both have common structure
    common_cols = df1.columns.intersection(df2.columns)
    df1 = df1[common_cols]
    df2 = df2[common_cols]

    # Merge on Entry ID and Protein Name
    merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2'))

    # Calculate differences
    repeat_cols = common_cols[2:]  # exclude ID and Name
    diff_data = {
        "Entry ID": merged["Entry ID"],
        "Protein Name": merged["Protein Name"]
    }

    for col in repeat_cols:
        diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs()

    # Create DataFrame of differences
    diff_df = pd.DataFrame(diff_data)
    diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1)
    diff_df_sorted = diff_df.sort_values(by="Total Difference", ascending=False)

    # Save and return
    output_path = "comparison_output.xlsx"
    diff_df_sorted.to_excel(output_path, index=False)
    return output_path

# Gradio UI
interface = gr.Interface(
    fn=compare_protein_repeat_frequencies,
    inputs=[
        gr.File(label="Upload First Excel File"),
        gr.File(label="Upload Second Excel File")
    ],
    outputs=gr.File(label="Download Comparison Excel"),
    title="Protein Repeat Comparator",
    description="Upload two Excel files containing protein repeat data. The app will compare frequencies and return a sorted Excel file showing differences."
)

interface.launch()