Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,3 @@
|
|
1 |
-
import os
|
2 |
-
os.system("pip install streamlit pandas xlsxwriter openpyxl matplotlib seaborn")
|
3 |
-
|
4 |
-
import streamlit as st
|
5 |
-
import pandas as pd
|
6 |
-
import xlsxwriter
|
7 |
-
from io import BytesIO
|
8 |
-
from collections import Counter
|
9 |
-
import matplotlib.pyplot as plt
|
10 |
-
import seaborn as sns
|
11 |
-
# For pie chart
|
12 |
-
# π COMBINED STREAMLIT PROTEIN ANALYSIS TOOL WITH COLORED COMPARISON
|
13 |
-
|
14 |
import os
|
15 |
os.system("pip install streamlit pandas xlsxwriter openpyxl pymongo")
|
16 |
|
@@ -20,6 +7,8 @@ import xlsxwriter
|
|
20 |
from io import BytesIO
|
21 |
from collections import defaultdict
|
22 |
import hashlib
|
|
|
|
|
23 |
|
24 |
# MongoDB Setup
|
25 |
try:
|
@@ -190,7 +179,6 @@ st.title("𧬠Protein Analysis Toolkit")
|
|
190 |
|
191 |
app_choice = st.radio("Choose an option", ["π Protein Repeat Finder", "π Protein Comparator", "π§ͺ Amino Acid Percentage Analyzer"])
|
192 |
|
193 |
-
|
194 |
if app_choice == "π Protein Repeat Finder":
|
195 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
196 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|
@@ -224,10 +212,11 @@ if app_choice == "π Protein Repeat Finder":
|
|
224 |
st.download_button(
|
225 |
label="Download Excel file",
|
226 |
data=st.session_state.excel_file,
|
227 |
-
file_name="
|
228 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
229 |
)
|
230 |
|
|
|
231 |
if st.checkbox("Show Results Table"):
|
232 |
rows = []
|
233 |
for file_index, file_data in enumerate(st.session_state.all_sequences_data):
|
@@ -238,29 +227,29 @@ if app_choice == "π Protein Repeat Finder":
|
|
238 |
rows.append(row)
|
239 |
result_df = pd.DataFrame(rows)
|
240 |
st.dataframe(result_df)
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
|
265 |
|
266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
os.system("pip install streamlit pandas xlsxwriter openpyxl pymongo")
|
3 |
|
|
|
7 |
from io import BytesIO
|
8 |
from collections import defaultdict
|
9 |
import hashlib
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
import seaborn as sns
|
12 |
|
13 |
# MongoDB Setup
|
14 |
try:
|
|
|
179 |
|
180 |
app_choice = st.radio("Choose an option", ["π Protein Repeat Finder", "π Protein Comparator", "π§ͺ Amino Acid Percentage Analyzer"])
|
181 |
|
|
|
182 |
if app_choice == "π Protein Repeat Finder":
|
183 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
184 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|
|
|
212 |
st.download_button(
|
213 |
label="Download Excel file",
|
214 |
data=st.session_state.excel_file,
|
215 |
+
file_name="Protein_Repeats_Analysis.xlsx",
|
216 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
217 |
)
|
218 |
|
219 |
+
# Display results table and repeat cluster visualization
|
220 |
if st.checkbox("Show Results Table"):
|
221 |
rows = []
|
222 |
for file_index, file_data in enumerate(st.session_state.all_sequences_data):
|
|
|
227 |
rows.append(row)
|
228 |
result_df = pd.DataFrame(rows)
|
229 |
st.dataframe(result_df)
|
230 |
+
|
231 |
+
# Repeat Cluster Visualization
|
232 |
+
repeat_counts = defaultdict(int)
|
233 |
+
for seq_data in st.session_state.all_sequences_data:
|
234 |
+
for _, _, freq_dict in seq_data:
|
235 |
+
for repeat, count in freq_dict.items():
|
236 |
+
repeat_counts[repeat] += count
|
237 |
+
|
238 |
+
if repeat_counts:
|
239 |
+
sorted_repeats = sorted(repeat_counts.items(), key=lambda x: x[1], reverse=True)
|
240 |
+
top_n = st.slider("Select number of top repeats to visualize", min_value=5, max_value=50, value=20)
|
241 |
+
top_repeats = sorted_repeats[:top_n]
|
242 |
+
repeats, counts = zip(*top_repeats)
|
243 |
+
|
244 |
+
plt.figure(figsize=(12, 6))
|
245 |
+
sns.barplot(x=list(repeats), y=list(counts), palette="viridis")
|
246 |
+
plt.xticks(rotation=45, ha='right')
|
247 |
+
plt.xlabel("Repeats")
|
248 |
+
plt.ylabel("Total Frequency")
|
249 |
+
plt.title("Top Repeat Clusters Across All Sequences")
|
250 |
+
st.pyplot(plt.gcf())
|
251 |
+
else:
|
252 |
+
st.warning("No repeat data available to visualize. Please upload files first.")
|
253 |
|
254 |
|
255 |
|