Spaces:

Jayesh13
/

Homo_hetero_caching

Sleeping

Jayesh13 commited on Apr 13

Commit

01b258e

verified ·

1 Parent(s): 03aaa04

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,9 +18,8 @@ results_collection = db['protein_results']
 def is_homo_repeat(s):
     return all(c == s[0] for c in s)
-def hash_sequence(sequence, analysis_type, overlap):
-    key_string = sequence + analysis_type + str(overlap)
-    return hashlib.md5(key_string.encode()).hexdigest()
 @st.cache_data(show_spinner=False)
 def fragment_protein_sequence(sequence, max_length=1000):
@@ -73,8 +72,8 @@ def find_new_boundary_repeats(fragments, final_repeats, overlap=50):
     return new_repeats
 def get_or_process_sequence(sequence, analysis_type, overlap=50):
-    sequence_hash = hash_sequence(sequence, analysis_type, overlap)
-    cached = results_collection.find_one({"_id": sequence_hash})
     if cached:
         return cached["repeats"]
@@ -115,6 +114,7 @@ def get_or_process_sequence(sequence, analysis_type, overlap=50):
     # Save to DB for caching
     results_collection.insert_one({
         "_id": sequence_hash,
         "repeats": dict(final_repeats)
     })
     return final_repeats
@@ -195,4 +195,4 @@ if uploaded_files:
                     row.update({repeat: freq.get(repeat, 0) for repeat in sorted(all_repeats)})
                     rows.append(row)
             result_df = pd.DataFrame(rows)
-            st.dataframe(result_df)

 def is_homo_repeat(s):
     return all(c == s[0] for c in s)
+def hash_sequence(sequence):
+    return hashlib.md5(sequence.encode()).hexdigest()
 @st.cache_data(show_spinner=False)
 def fragment_protein_sequence(sequence, max_length=1000):
     return new_repeats
 def get_or_process_sequence(sequence, analysis_type, overlap=50):
+    sequence_hash = hash_sequence(sequence)
+    cached = results_collection.find_one({"_id": sequence_hash, "analysis_type": analysis_type})
     if cached:
         return cached["repeats"]
     # Save to DB for caching
     results_collection.insert_one({
         "_id": sequence_hash,
+        "analysis_type": analysis_type,
         "repeats": dict(final_repeats)
     })
     return final_repeats
                     row.update({repeat: freq.get(repeat, 0) for repeat in sorted(all_repeats)})
                     rows.append(row)
             result_df = pd.DataFrame(rows)
+            st.dataframe(result_df)