import os os.system("pip install streamlit pandas xlsxwriter openpyxl matplotlib") import streamlit as st import pandas as pd import xlsxwriter from io import BytesIO from collections import Counter import matplotlib.pyplot as plt # For pie chart # Set of 20 standard amino acids AMINO_ACIDS = set("ACDEFGHIKLMNPQRSTVWY") st.set_page_config(page_title="Amino Acid Percentage Tool", layout="wide") st.title("🧬 Amino Acid Percentage Analyzer") uploaded_file = st.file_uploader("Upload Excel file (with Entry, Protein Name, Sequence)", type=["xlsx"]) if uploaded_file and st.button("Analyze File"): df = pd.read_excel(uploaded_file) if len(df.columns) < 3: st.error("The file must have at least three columns: Entry, Protein Name, Sequence") else: entry_col = df.columns[0] name_col = df.columns[1] seq_col = df.columns[2] all_counts = Counter() all_length = 0 result_rows = [] for _, row in df.iterrows(): entry = str(row[entry_col]) name = str(row[name_col]) sequence = str(row[seq_col]).replace(" ", "").replace("\"", "").strip().upper() sequence = ''.join(filter(lambda c: c in AMINO_ACIDS, sequence)) length = len(sequence) if length == 0: continue count = Counter(sequence) all_counts.update(count) all_length += length percentage = {aa: round(count[aa] / length * 100, 2) for aa in AMINO_ACIDS} result_rows.append({"Entry": entry, "Protein Name": name, **percentage}) # Calculate overall percentage overall_percentage = {aa: round(all_counts[aa] / all_length * 100, 2) for aa in AMINO_ACIDS} overall_row = {"Entry": "OVERALL", "Protein Name": "ALL SEQUENCES", **overall_percentage} # Combine overall row first, then all individual rows df_result = pd.concat([pd.DataFrame([overall_row]), pd.DataFrame(result_rows)], ignore_index=True) st.dataframe(df_result) # 🔵 Pie Chart for Overall Stats st.subheader("🧁 Overall Amino Acid Composition (Pie Chart)") fig, ax = plt.subplots(figsize=(6, 6)) # 50% of typical view size labels = list(overall_percentage.keys()) sizes = list(overall_percentage.values()) # Filter out amino acids with 0% to avoid clutter filtered = [(label, size) for label, size in zip(labels, sizes) if size > 0] if filtered: labels, sizes = zip(*filtered) ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, counterclock=False) ax.axis('equal') # Equal aspect ratio ensures the pie is circular. st.pyplot(fig) else: st.info("No valid amino acids found to display in pie chart.") # Export to Excel def to_excel(df): output = BytesIO() workbook = xlsxwriter.Workbook(output, {'in_memory': True}) worksheet = workbook.add_worksheet("Amino Acid %") header_format = workbook.add_format({'bold': True, 'bg_color': '#CDEDF6'}) for col_num, col_name in enumerate(df.columns): worksheet.write(0, col_num, col_name, header_format) for row_num, row in enumerate(df.itertuples(index=False), start=1): for col_num, value in enumerate(row): worksheet.write(row_num, col_num, value) workbook.close() output.seek(0) return output excel_file = to_excel(df_result) st.download_button( label="Download Excel Report", data=excel_file, file_name="amino_acid_percentage.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" )