import os os.system("pip install streamlit pandas xlsxwriter openpyxl") import streamlit as st import pandas as pd import xlsxwriter from io import BytesIO from collections import Counter # Set of 20 standard amino acids AMINO_ACIDS = set("ACDEFGHIKLMNPQRSTVWY") st.set_page_config(page_title="Amino Acid Percentage Tool", layout="wide") st.title("🧬 Amino Acid Percentage Analyzer") uploaded_file = st.file_uploader("Upload Excel file (with Entry, Protein Name, Sequence)", type=["xlsx"]) if uploaded_file and st.button("Analyze File"): df = pd.read_excel(uploaded_file) if len(df.columns) < 3: st.error("The file must have at least three columns: Entry, Protein Name, Sequence") else: entry_col = df.columns[0] name_col = df.columns[1] seq_col = df.columns[2] all_counts = Counter() all_length = 0 result_rows = [] for _, row in df.iterrows(): entry = str(row[entry_col]) name = str(row[name_col]) sequence = str(row[seq_col]).replace(" ", "").replace("\"", "").strip().upper() sequence = ''.join(filter(lambda c: c in AMINO_ACIDS, sequence)) length = len(sequence) if length == 0: continue count = Counter(sequence) all_counts.update(count) all_length += length percentage = {aa: round(count[aa] / length * 100, 2) for aa in AMINO_ACIDS} result_rows.append({"Entry": entry, "Protein Name": name, **percentage}) df_result = pd.DataFrame(result_rows) # Calculate overall percentage overall_percentage = {aa: round(all_counts[aa] / all_length * 100, 2) for aa in AMINO_ACIDS} overall_row = {"Entry": "OVERALL", "Protein Name": "ALL SEQUENCES", **overall_percentage} df_result = pd.concat([df_result, pd.DataFrame([overall_row])], ignore_index=True) st.dataframe(df_result) # Export to Excel def to_excel(df): output = BytesIO() workbook = xlsxwriter.Workbook(output, {'in_memory': True}) worksheet = workbook.add_worksheet("Amino Acid %") header_format = workbook.add_format({'bold': True, 'bg_color': '#CDEDF6'}) for col_num, col_name in enumerate(df.columns): worksheet.write(0, col_num, col_name, header_format) for row_num, row in enumerate(df.itertuples(index=False), start=1): for col_num, value in enumerate(row): worksheet.write(row_num, col_num, value) workbook.close() output.seek(0) return output excel_file = to_excel(df_result) st.download_button( label="Download Excel Report", data=excel_file, file_name="amino_acid_percentage.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" )