Spaces:

Jayesh13
/

BTP_Phase2

Build error

App Files Files Community

BTP_Phase2 / app.py

Jayesh13

Create app.py

3f486f6 verified 2 months ago

raw

history blame

2.97 kB

	import os
	os.system("pip install streamlit pandas xlsxwriter openpyxl")

	import streamlit as st
	import pandas as pd
	import xlsxwriter
	from io import BytesIO
	from collections import Counter

	# Set of 20 standard amino acids
	AMINO_ACIDS = set("ACDEFGHIKLMNPQRSTVWY")

	st.set_page_config(page_title="Amino Acid Percentage Tool", layout="wide")
	st.title("🧬 Amino Acid Percentage Analyzer")

	uploaded_file = st.file_uploader("Upload Excel file (with Entry, Protein Name, Sequence)", type=["xlsx"])

	if uploaded_file and st.button("Analyze File"):
	df = pd.read_excel(uploaded_file)

	if len(df.columns) < 3:
	st.error("The file must have at least three columns: Entry, Protein Name, Sequence")
	else:
	entry_col = df.columns[0]
	name_col = df.columns[1]
	seq_col = df.columns[2]

	all_counts = Counter()
	all_length = 0

	result_rows = []

	for _, row in df.iterrows():
	entry = str(row[entry_col])
	name = str(row[name_col])
	sequence = str(row[seq_col]).replace(" ", "").replace("\"", "").strip().upper()
	sequence = ''.join(filter(lambda c: c in AMINO_ACIDS, sequence))
	length = len(sequence)

	if length == 0:
	continue

	count = Counter(sequence)
	all_counts.update(count)
	all_length += length

	percentage = {aa: round(count[aa] / length * 100, 2) for aa in AMINO_ACIDS}
	result_rows.append({"Entry": entry, "Protein Name": name, **percentage})

	df_result = pd.DataFrame(result_rows)

	# Calculate overall percentage
	overall_percentage = {aa: round(all_counts[aa] / all_length * 100, 2) for aa in AMINO_ACIDS}
	overall_row = {"Entry": "OVERALL", "Protein Name": "ALL SEQUENCES", **overall_percentage}
	df_result = pd.concat([df_result, pd.DataFrame([overall_row])], ignore_index=True)

	st.dataframe(df_result)

	# Export to Excel
	def to_excel(df):
	output = BytesIO()
	workbook = xlsxwriter.Workbook(output, {'in_memory': True})
	worksheet = workbook.add_worksheet("Amino Acid %")

	header_format = workbook.add_format({'bold': True, 'bg_color': '#CDEDF6'})

	for col_num, col_name in enumerate(df.columns):
	worksheet.write(0, col_num, col_name, header_format)

	for row_num, row in enumerate(df.itertuples(index=False), start=1):
	for col_num, value in enumerate(row):
	worksheet.write(row_num, col_num, value)

	workbook.close()
	output.seek(0)
	return output

	excel_file = to_excel(df_result)

	st.download_button(
	label="Download Excel Report",
	data=excel_file,
	file_name="amino_acid_percentage.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)