Spaces:
Build error
Build error
File size: 2,967 Bytes
3f486f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
os.system("pip install streamlit pandas xlsxwriter openpyxl")
import streamlit as st
import pandas as pd
import xlsxwriter
from io import BytesIO
from collections import Counter
# Set of 20 standard amino acids
AMINO_ACIDS = set("ACDEFGHIKLMNPQRSTVWY")
st.set_page_config(page_title="Amino Acid Percentage Tool", layout="wide")
st.title("🧬 Amino Acid Percentage Analyzer")
uploaded_file = st.file_uploader("Upload Excel file (with Entry, Protein Name, Sequence)", type=["xlsx"])
if uploaded_file and st.button("Analyze File"):
df = pd.read_excel(uploaded_file)
if len(df.columns) < 3:
st.error("The file must have at least three columns: Entry, Protein Name, Sequence")
else:
entry_col = df.columns[0]
name_col = df.columns[1]
seq_col = df.columns[2]
all_counts = Counter()
all_length = 0
result_rows = []
for _, row in df.iterrows():
entry = str(row[entry_col])
name = str(row[name_col])
sequence = str(row[seq_col]).replace(" ", "").replace("\"", "").strip().upper()
sequence = ''.join(filter(lambda c: c in AMINO_ACIDS, sequence))
length = len(sequence)
if length == 0:
continue
count = Counter(sequence)
all_counts.update(count)
all_length += length
percentage = {aa: round(count[aa] / length * 100, 2) for aa in AMINO_ACIDS}
result_rows.append({"Entry": entry, "Protein Name": name, **percentage})
df_result = pd.DataFrame(result_rows)
# Calculate overall percentage
overall_percentage = {aa: round(all_counts[aa] / all_length * 100, 2) for aa in AMINO_ACIDS}
overall_row = {"Entry": "OVERALL", "Protein Name": "ALL SEQUENCES", **overall_percentage}
df_result = pd.concat([df_result, pd.DataFrame([overall_row])], ignore_index=True)
st.dataframe(df_result)
# Export to Excel
def to_excel(df):
output = BytesIO()
workbook = xlsxwriter.Workbook(output, {'in_memory': True})
worksheet = workbook.add_worksheet("Amino Acid %")
header_format = workbook.add_format({'bold': True, 'bg_color': '#CDEDF6'})
for col_num, col_name in enumerate(df.columns):
worksheet.write(0, col_num, col_name, header_format)
for row_num, row in enumerate(df.itertuples(index=False), start=1):
for col_num, value in enumerate(row):
worksheet.write(row_num, col_num, value)
workbook.close()
output.seek(0)
return output
excel_file = to_excel(df_result)
st.download_button(
label="Download Excel Report",
data=excel_file,
file_name="amino_acid_percentage.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
) |