Spaces:

Jayesh13
/

BTP_Phase2

Build error

App Files Files Community

Jayesh13 commited on Apr 14

Commit

781ebc0

verified ·

1 Parent(s): c3138dd

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -38

app.py CHANGED Viewed

@@ -1,16 +1,3 @@
-import os
-os.system("pip install streamlit pandas xlsxwriter openpyxl matplotlib seaborn")
-import streamlit as st
-import pandas as pd
-import xlsxwriter
-from io import BytesIO
-from collections import Counter
-import matplotlib.pyplot as plt
-import seaborn as sns
- # For pie chart
-# 🔄 COMBINED STREAMLIT PROTEIN ANALYSIS TOOL WITH COLORED COMPARISON
 import os
 os.system("pip install streamlit pandas xlsxwriter openpyxl pymongo")
@@ -20,6 +7,8 @@ import xlsxwriter
 from io import BytesIO
 from collections import defaultdict
 import hashlib
 # MongoDB Setup
 try:
@@ -190,7 +179,6 @@ st.title("🧬 Protein Analysis Toolkit")
 app_choice = st.radio("Choose an option", ["🔁 Protein Repeat Finder", "📊 Protein Comparator", "🧪 Amino Acid Percentage Analyzer"])
 if app_choice == "🔁 Protein Repeat Finder":
     analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
     uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
@@ -224,10 +212,11 @@ if app_choice == "🔁 Protein Repeat Finder":
         st.download_button(
             label="Download Excel file",
             data=st.session_state.excel_file,
-            file_name="protein_repeat_results.xlsx",
             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
         )
     if st.checkbox("Show Results Table"):
         rows = []
         for file_index, file_data in enumerate(st.session_state.all_sequences_data):
@@ -238,29 +227,29 @@ if app_choice == "🔁 Protein Repeat Finder":
                 rows.append(row)
         result_df = pd.DataFrame(rows)
         st.dataframe(result_df)
-    # if st.checkbox("Repeat Cluster Visualization"):
-    #     repeat_counts = defaultdict(int)
-    #     for seq_data in st.session_state.all_sequences_data:
-    #         for _, _, freq_dict in seq_data:
-    #             for repeat, count in freq_dict.items():
-    #                 repeat_counts[repeat] += count
-    #     if repeat_counts:
-    #         sorted_repeats = sorted(repeat_counts.items(), key=lambda x: x[1], reverse=True)
-    #         top_n = st.slider("Select number of top repeats to visualize", min_value=5, max_value=50, value=20)
-    #         top_repeats = sorted_repeats[:top_n]
-    #         repeats, counts = zip(*top_repeats)
-    #         plt.figure(figsize=(12, 6))
-    #         sns.barplot(x=list(repeats), y=list(counts), palette="viridis")
-    #         plt.xticks(rotation=45, ha='right')
-    #         plt.xlabel("Repeats")
-    #         plt.ylabel("Total Frequency")
-    #         plt.title("Top Repeat Clusters Across All Sequences")
-    #         st.pyplot(plt.gcf())
-    #     else:
-    #         st.warning("No repeat data available to visualize. Please upload files first.")

 import os
 os.system("pip install streamlit pandas xlsxwriter openpyxl pymongo")
 from io import BytesIO
 from collections import defaultdict
 import hashlib
+import matplotlib.pyplot as plt
+import seaborn as sns
 # MongoDB Setup
 try:
 app_choice = st.radio("Choose an option", ["🔁 Protein Repeat Finder", "📊 Protein Comparator", "🧪 Amino Acid Percentage Analyzer"])
 if app_choice == "🔁 Protein Repeat Finder":
     analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
     uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
         st.download_button(
             label="Download Excel file",
             data=st.session_state.excel_file,
+            file_name="Protein_Repeats_Analysis.xlsx",
             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
         )
+    # Display results table and repeat cluster visualization
     if st.checkbox("Show Results Table"):
         rows = []
         for file_index, file_data in enumerate(st.session_state.all_sequences_data):
                 rows.append(row)
         result_df = pd.DataFrame(rows)
         st.dataframe(result_df)
+        # Repeat Cluster Visualization
+        repeat_counts = defaultdict(int)
+        for seq_data in st.session_state.all_sequences_data:
+            for _, _, freq_dict in seq_data:
+                for repeat, count in freq_dict.items():
+                    repeat_counts[repeat] += count
+        if repeat_counts:
+            sorted_repeats = sorted(repeat_counts.items(), key=lambda x: x[1], reverse=True)
+            top_n = st.slider("Select number of top repeats to visualize", min_value=5, max_value=50, value=20)
+            top_repeats = sorted_repeats[:top_n]
+            repeats, counts = zip(*top_repeats)
+            plt.figure(figsize=(12, 6))
+            sns.barplot(x=list(repeats), y=list(counts), palette="viridis")
+            plt.xticks(rotation=45, ha='right')
+            plt.xlabel("Repeats")
+            plt.ylabel("Total Frequency")
+            plt.title("Top Repeat Clusters Across All Sequences")
+            st.pyplot(plt.gcf())
+        else:
+            st.warning("No repeat data available to visualize. Please upload files first.")