Spaces:

Jayesh13
/

Homo_hetero_caching

Sleeping

App Files Files Community

Jayesh13 commited on Apr 13

Commit

b33eb44

verified ·

1 Parent(s): 01b258e

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -3

app.py CHANGED Viewed

@@ -111,7 +111,6 @@ def get_or_process_sequence(sequence, analysis_type, overlap=50):
         for k, v in hetero_repeats.items():
             final_repeats[k] += v
-    # Save to DB for caching
     results_collection.insert_one({
         "_id": sequence_hash,
         "analysis_type": analysis_type,
@@ -122,6 +121,7 @@ def get_or_process_sequence(sequence, analysis_type, overlap=50):
 def process_excel(excel_data, analysis_type):
     repeats = set()
     sequence_data = []
     for sheet_name in excel_data.sheet_names:
         df = excel_data.parse(sheet_name)
         if len(df.columns) < 3:
@@ -130,10 +130,14 @@ def process_excel(excel_data, analysis_type):
         for _, row in df.iterrows():
             entry_id = str(row[0])
             protein_name = str(row[1])
-            sequence = str(row[2]).replace('"', '').replace(' ', '')
             freq = get_or_process_sequence(sequence, analysis_type)
             sequence_data.append((entry_id, protein_name, freq))
             repeats.update(freq.keys())
     return repeats, sequence_data
 def create_excel(sequences_data, repeats, filenames):
@@ -178,7 +182,7 @@ if uploaded_files:
             all_sequences_data.append(sequence_data)
             filenames.append(file.name)
     if all_sequences_data:
-        st.success(f"Processed {len(uploaded_files)} files successfully!")
         excel_file = create_excel(all_sequences_data, all_repeats, filenames)
         st.download_button(
             label="Download Excel file",

         for k, v in hetero_repeats.items():
             final_repeats[k] += v
     results_collection.insert_one({
         "_id": sequence_hash,
         "analysis_type": analysis_type,
 def process_excel(excel_data, analysis_type):
     repeats = set()
     sequence_data = []
+    count = 0
     for sheet_name in excel_data.sheet_names:
         df = excel_data.parse(sheet_name)
         if len(df.columns) < 3:
         for _, row in df.iterrows():
             entry_id = str(row[0])
             protein_name = str(row[1])
+            sequence = str(row[2]).replace('"', '').replace(' ', '').strip()
+            if not sequence:  # Skip empty sequence
+                continue
+            count += 1
             freq = get_or_process_sequence(sequence, analysis_type)
             sequence_data.append((entry_id, protein_name, freq))
             repeats.update(freq.keys())
+    st.toast(f"{count} sequences processed.")
     return repeats, sequence_data
 def create_excel(sequences_data, repeats, filenames):
             all_sequences_data.append(sequence_data)
             filenames.append(file.name)
     if all_sequences_data:
+        st.toast(f"Processed {len(uploaded_files)} file(s) successfully.")
         excel_file = create_excel(all_sequences_data, all_repeats, filenames)
         st.download_button(
             label="Download Excel file",