Jayesh13 commited on
Commit
3f486f6
·
verified ·
1 Parent(s): c1cef30

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system("pip install streamlit pandas xlsxwriter openpyxl")
3
+
4
+ import streamlit as st
5
+ import pandas as pd
6
+ import xlsxwriter
7
+ from io import BytesIO
8
+ from collections import Counter
9
+
10
+ # Set of 20 standard amino acids
11
+ AMINO_ACIDS = set("ACDEFGHIKLMNPQRSTVWY")
12
+
13
+ st.set_page_config(page_title="Amino Acid Percentage Tool", layout="wide")
14
+ st.title("🧬 Amino Acid Percentage Analyzer")
15
+
16
+ uploaded_file = st.file_uploader("Upload Excel file (with Entry, Protein Name, Sequence)", type=["xlsx"])
17
+
18
+ if uploaded_file and st.button("Analyze File"):
19
+ df = pd.read_excel(uploaded_file)
20
+
21
+ if len(df.columns) < 3:
22
+ st.error("The file must have at least three columns: Entry, Protein Name, Sequence")
23
+ else:
24
+ entry_col = df.columns[0]
25
+ name_col = df.columns[1]
26
+ seq_col = df.columns[2]
27
+
28
+ all_counts = Counter()
29
+ all_length = 0
30
+
31
+ result_rows = []
32
+
33
+ for _, row in df.iterrows():
34
+ entry = str(row[entry_col])
35
+ name = str(row[name_col])
36
+ sequence = str(row[seq_col]).replace(" ", "").replace("\"", "").strip().upper()
37
+ sequence = ''.join(filter(lambda c: c in AMINO_ACIDS, sequence))
38
+ length = len(sequence)
39
+
40
+ if length == 0:
41
+ continue
42
+
43
+ count = Counter(sequence)
44
+ all_counts.update(count)
45
+ all_length += length
46
+
47
+ percentage = {aa: round(count[aa] / length * 100, 2) for aa in AMINO_ACIDS}
48
+ result_rows.append({"Entry": entry, "Protein Name": name, **percentage})
49
+
50
+ df_result = pd.DataFrame(result_rows)
51
+
52
+ # Calculate overall percentage
53
+ overall_percentage = {aa: round(all_counts[aa] / all_length * 100, 2) for aa in AMINO_ACIDS}
54
+ overall_row = {"Entry": "OVERALL", "Protein Name": "ALL SEQUENCES", **overall_percentage}
55
+ df_result = pd.concat([df_result, pd.DataFrame([overall_row])], ignore_index=True)
56
+
57
+ st.dataframe(df_result)
58
+
59
+ # Export to Excel
60
+ def to_excel(df):
61
+ output = BytesIO()
62
+ workbook = xlsxwriter.Workbook(output, {'in_memory': True})
63
+ worksheet = workbook.add_worksheet("Amino Acid %")
64
+
65
+ header_format = workbook.add_format({'bold': True, 'bg_color': '#CDEDF6'})
66
+
67
+ for col_num, col_name in enumerate(df.columns):
68
+ worksheet.write(0, col_num, col_name, header_format)
69
+
70
+ for row_num, row in enumerate(df.itertuples(index=False), start=1):
71
+ for col_num, value in enumerate(row):
72
+ worksheet.write(row_num, col_num, value)
73
+
74
+ workbook.close()
75
+ output.seek(0)
76
+ return output
77
+
78
+ excel_file = to_excel(df_result)
79
+
80
+ st.download_button(
81
+ label="Download Excel Report",
82
+ data=excel_file,
83
+ file_name="amino_acid_percentage.xlsx",
84
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
85
+ )