Spaces:

Jayesh13
/

Protein_repeat_comparator

Sleeping

App Files Files Community

Protein_repeat_comparator / app.py

Jayesh13

Update app.py

5cd21b9 verified 3 months ago

raw

history blame

2.12 kB

	import os
	os.system("pip install streamlit pandas xlsxwriter openpyxl")

	import streamlit as st
	import pandas as pd

	st.set_page_config(page_title="Protein Repeat Comparator", layout="centered")
	st.title("🧬 Protein Repeat Comparator")
	st.write("Upload two Excel files containing protein repeat frequencies. The tool will compare the values and return a sorted Excel file based on frequency differences.")

	# File upload
	uploaded_file1 = st.file_uploader("Upload First Excel File", type=["xlsx"])
	uploaded_file2 = st.file_uploader("Upload Second Excel File", type=["xlsx"])

	if uploaded_file1 and uploaded_file2:
	try:
	# Read both Excel files
	df1 = pd.read_excel(uploaded_file1)
	df2 = pd.read_excel(uploaded_file2)

	# Ensure structure compatibility
	common_cols = df1.columns.intersection(df2.columns)
	df1 = df1[common_cols]
	df2 = df2[common_cols]

	# Merge on Entry ID and Protein Name
	merged = pd.merge(df1, df2, on=["Entry ID", "Protein Name"], suffixes=('_file1', '_file2'))

	# Compute differences
	repeat_cols = common_cols[2:]
	diff_data = {
	"Entry ID": merged["Entry ID"],
	"Protein Name": merged["Protein Name"]
	}

	for col in repeat_cols:
	diff_data[col + "_diff"] = (merged[col + "_file1"] - merged[col + "_file2"]).abs()

	diff_df = pd.DataFrame(diff_data)
	diff_df["Total Difference"] = diff_df.iloc[:, 2:].sum(axis=1)
	sorted_diff = diff_df.sort_values(by="Total Difference", ascending=False)

	# Save to Excel
	output_file = "protein_repeat_comparison.xlsx"
	sorted_diff.to_excel(output_file, index=False)

	st.success("✅ Comparison complete!")
	with open(output_file, "rb") as f:
	st.download_button(
	label="📥 Download Comparison Excel",
	data=f,
	file_name=output_file,
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)

	except Exception as e:
	st.error(f"⚠️ Error: {e}")