import gradio as gr import pandas as pd def compare_protein_repeat_frequencies(file1, file2): # Load both Excel files df1 = pd.read_excel(file1.name) df2 = pd.read_excel(file2.name) # Ensure both have common structure common_cols = df1.columns.intersection(df2.columns) df1 = df1[common_cols] df2 = df2[common_cols] # Merge on Entry ID and Protein Name merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2')) # Calculate differences repeat_cols = common_cols[2:] # exclude ID and Name diff_data = { "Entry ID": merged["Entry ID"], "Protein Name": merged["Protein Name"] } for col in repeat_cols: diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs() # Create DataFrame of differences diff_df = pd.DataFrame(diff_data) diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1) diff_df_sorted = diff_df.sort_values(by="Total Difference", ascending=False) # Save and return output_path = "comparison_output.xlsx" diff_df_sorted.to_excel(output_path, index=False) return output_path # Gradio UI interface = gr.Interface( fn=compare_protein_repeat_frequencies, inputs=[ gr.File(label="Upload First Excel File"), gr.File(label="Upload Second Excel File") ], outputs=gr.File(label="Download Comparison Excel"), title="Protein Repeat Comparator", description="Upload two Excel files containing protein repeat data. The app will compare frequencies and return a sorted Excel file showing differences." ) interface.launch()