Jayesh13 commited on
Commit
b33eb44
·
verified ·
1 Parent(s): 01b258e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -111,7 +111,6 @@ def get_or_process_sequence(sequence, analysis_type, overlap=50):
111
  for k, v in hetero_repeats.items():
112
  final_repeats[k] += v
113
 
114
- # Save to DB for caching
115
  results_collection.insert_one({
116
  "_id": sequence_hash,
117
  "analysis_type": analysis_type,
@@ -122,6 +121,7 @@ def get_or_process_sequence(sequence, analysis_type, overlap=50):
122
  def process_excel(excel_data, analysis_type):
123
  repeats = set()
124
  sequence_data = []
 
125
  for sheet_name in excel_data.sheet_names:
126
  df = excel_data.parse(sheet_name)
127
  if len(df.columns) < 3:
@@ -130,10 +130,14 @@ def process_excel(excel_data, analysis_type):
130
  for _, row in df.iterrows():
131
  entry_id = str(row[0])
132
  protein_name = str(row[1])
133
- sequence = str(row[2]).replace('"', '').replace(' ', '')
 
 
 
134
  freq = get_or_process_sequence(sequence, analysis_type)
135
  sequence_data.append((entry_id, protein_name, freq))
136
  repeats.update(freq.keys())
 
137
  return repeats, sequence_data
138
 
139
  def create_excel(sequences_data, repeats, filenames):
@@ -178,7 +182,7 @@ if uploaded_files:
178
  all_sequences_data.append(sequence_data)
179
  filenames.append(file.name)
180
  if all_sequences_data:
181
- st.success(f"Processed {len(uploaded_files)} files successfully!")
182
  excel_file = create_excel(all_sequences_data, all_repeats, filenames)
183
  st.download_button(
184
  label="Download Excel file",
 
111
  for k, v in hetero_repeats.items():
112
  final_repeats[k] += v
113
 
 
114
  results_collection.insert_one({
115
  "_id": sequence_hash,
116
  "analysis_type": analysis_type,
 
121
  def process_excel(excel_data, analysis_type):
122
  repeats = set()
123
  sequence_data = []
124
+ count = 0
125
  for sheet_name in excel_data.sheet_names:
126
  df = excel_data.parse(sheet_name)
127
  if len(df.columns) < 3:
 
130
  for _, row in df.iterrows():
131
  entry_id = str(row[0])
132
  protein_name = str(row[1])
133
+ sequence = str(row[2]).replace('"', '').replace(' ', '').strip()
134
+ if not sequence: # Skip empty sequence
135
+ continue
136
+ count += 1
137
  freq = get_or_process_sequence(sequence, analysis_type)
138
  sequence_data.append((entry_id, protein_name, freq))
139
  repeats.update(freq.keys())
140
+ st.toast(f"{count} sequences processed.")
141
  return repeats, sequence_data
142
 
143
  def create_excel(sequences_data, repeats, filenames):
 
182
  all_sequences_data.append(sequence_data)
183
  filenames.append(file.name)
184
  if all_sequences_data:
185
+ st.toast(f"Processed {len(uploaded_files)} file(s) successfully.")
186
  excel_file = create_excel(all_sequences_data, all_repeats, filenames)
187
  st.download_button(
188
  label="Download Excel file",