Spaces:
Build error
Build error
import os | |
os.system("pip install streamlit pandas xlsxwriter openpyxl matplotlib") | |
import streamlit as st | |
import pandas as pd | |
import xlsxwriter | |
from io import BytesIO | |
from collections import Counter | |
import matplotlib.pyplot as plt # For pie chart | |
# Set of 20 standard amino acids | |
AMINO_ACIDS = set("ACDEFGHIKLMNPQRSTVWY") | |
st.set_page_config(page_title="Amino Acid Percentage Tool", layout="wide") | |
st.title("𧬠Amino Acid Percentage Analyzer") | |
uploaded_file = st.file_uploader("Upload Excel file (with Entry, Protein Name, Sequence)", type=["xlsx"]) | |
if uploaded_file and st.button("Analyze File"): | |
df = pd.read_excel(uploaded_file) | |
if len(df.columns) < 3: | |
st.error("The file must have at least three columns: Entry, Protein Name, Sequence") | |
else: | |
entry_col = df.columns[0] | |
name_col = df.columns[1] | |
seq_col = df.columns[2] | |
all_counts = Counter() | |
all_length = 0 | |
result_rows = [] | |
for _, row in df.iterrows(): | |
entry = str(row[entry_col]) | |
name = str(row[name_col]) | |
sequence = str(row[seq_col]).replace(" ", "").replace("\"", "").strip().upper() | |
sequence = ''.join(filter(lambda c: c in AMINO_ACIDS, sequence)) | |
length = len(sequence) | |
if length == 0: | |
continue | |
count = Counter(sequence) | |
all_counts.update(count) | |
all_length += length | |
percentage = {aa: round(count[aa] / length * 100, 2) for aa in AMINO_ACIDS} | |
result_rows.append({"Entry": entry, "Protein Name": name, **percentage}) | |
# Calculate overall percentage | |
overall_percentage = {aa: round(all_counts[aa] / all_length * 100, 2) for aa in AMINO_ACIDS} | |
overall_row = {"Entry": "OVERALL", "Protein Name": "ALL SEQUENCES", **overall_percentage} | |
# Combine overall row first, then all individual rows | |
df_result = pd.concat([pd.DataFrame([overall_row]), pd.DataFrame(result_rows)], ignore_index=True) | |
st.dataframe(df_result) | |
# π΅ Pie Chart for Overall Stats | |
st.subheader("π§ Overall Amino Acid Composition (Pie Chart)") | |
fig, ax = plt.subplots(figsize=(3, 3)) # 50% of typical view size | |
labels = list(overall_percentage.keys()) | |
sizes = list(overall_percentage.values()) | |
# Filter out amino acids with 0% to avoid clutter | |
filtered = [(label, size) for label, size in zip(labels, sizes) if size > 0] | |
if filtered: | |
labels, sizes = zip(*filtered) | |
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, counterclock=False) | |
ax.axis('equal') # Equal aspect ratio ensures the pie is circular. | |
st.pyplot(fig) | |
else: | |
st.info("No valid amino acids found to display in pie chart.") | |
# Export to Excel | |
def to_excel(df): | |
output = BytesIO() | |
workbook = xlsxwriter.Workbook(output, {'in_memory': True}) | |
worksheet = workbook.add_worksheet("Amino Acid %") | |
header_format = workbook.add_format({'bold': True, 'bg_color': '#CDEDF6'}) | |
for col_num, col_name in enumerate(df.columns): | |
worksheet.write(0, col_num, col_name, header_format) | |
for row_num, row in enumerate(df.itertuples(index=False), start=1): | |
for col_num, value in enumerate(row): | |
worksheet.write(row_num, col_num, value) | |
workbook.close() | |
output.seek(0) | |
return output | |
excel_file = to_excel(df_result) | |
st.download_button( | |
label="Download Excel Report", | |
data=excel_file, | |
file_name="amino_acid_percentage.xlsx", | |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
) | |