Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -111,7 +111,6 @@ def get_or_process_sequence(sequence, analysis_type, overlap=50):
|
|
111 |
for k, v in hetero_repeats.items():
|
112 |
final_repeats[k] += v
|
113 |
|
114 |
-
# Save to DB for caching
|
115 |
results_collection.insert_one({
|
116 |
"_id": sequence_hash,
|
117 |
"analysis_type": analysis_type,
|
@@ -122,6 +121,7 @@ def get_or_process_sequence(sequence, analysis_type, overlap=50):
|
|
122 |
def process_excel(excel_data, analysis_type):
|
123 |
repeats = set()
|
124 |
sequence_data = []
|
|
|
125 |
for sheet_name in excel_data.sheet_names:
|
126 |
df = excel_data.parse(sheet_name)
|
127 |
if len(df.columns) < 3:
|
@@ -130,10 +130,14 @@ def process_excel(excel_data, analysis_type):
|
|
130 |
for _, row in df.iterrows():
|
131 |
entry_id = str(row[0])
|
132 |
protein_name = str(row[1])
|
133 |
-
sequence = str(row[2]).replace('"', '').replace(' ', '')
|
|
|
|
|
|
|
134 |
freq = get_or_process_sequence(sequence, analysis_type)
|
135 |
sequence_data.append((entry_id, protein_name, freq))
|
136 |
repeats.update(freq.keys())
|
|
|
137 |
return repeats, sequence_data
|
138 |
|
139 |
def create_excel(sequences_data, repeats, filenames):
|
@@ -178,7 +182,7 @@ if uploaded_files:
|
|
178 |
all_sequences_data.append(sequence_data)
|
179 |
filenames.append(file.name)
|
180 |
if all_sequences_data:
|
181 |
-
st.
|
182 |
excel_file = create_excel(all_sequences_data, all_repeats, filenames)
|
183 |
st.download_button(
|
184 |
label="Download Excel file",
|
|
|
111 |
for k, v in hetero_repeats.items():
|
112 |
final_repeats[k] += v
|
113 |
|
|
|
114 |
results_collection.insert_one({
|
115 |
"_id": sequence_hash,
|
116 |
"analysis_type": analysis_type,
|
|
|
121 |
def process_excel(excel_data, analysis_type):
|
122 |
repeats = set()
|
123 |
sequence_data = []
|
124 |
+
count = 0
|
125 |
for sheet_name in excel_data.sheet_names:
|
126 |
df = excel_data.parse(sheet_name)
|
127 |
if len(df.columns) < 3:
|
|
|
130 |
for _, row in df.iterrows():
|
131 |
entry_id = str(row[0])
|
132 |
protein_name = str(row[1])
|
133 |
+
sequence = str(row[2]).replace('"', '').replace(' ', '').strip()
|
134 |
+
if not sequence: # Skip empty sequence
|
135 |
+
continue
|
136 |
+
count += 1
|
137 |
freq = get_or_process_sequence(sequence, analysis_type)
|
138 |
sequence_data.append((entry_id, protein_name, freq))
|
139 |
repeats.update(freq.keys())
|
140 |
+
st.toast(f"{count} sequences processed.")
|
141 |
return repeats, sequence_data
|
142 |
|
143 |
def create_excel(sequences_data, repeats, filenames):
|
|
|
182 |
all_sequences_data.append(sequence_data)
|
183 |
filenames.append(file.name)
|
184 |
if all_sequences_data:
|
185 |
+
st.toast(f"Processed {len(uploaded_files)} file(s) successfully.")
|
186 |
excel_file = create_excel(all_sequences_data, all_repeats, filenames)
|
187 |
st.download_button(
|
188 |
label="Download Excel file",
|