Spaces:

MoraxCheng
/

Transeption_iGEM_BASISCHINA_2025

Runtime error

App Files Files Community

MoraxCheng commited on Jun 25

Commit

7c1376e

1 Parent(s): 9fff9fd

Refactor scoring matrix visualization to simplify font and figure size adjustments, ensuring consistent label formatting and improved clarity.

Browse files

Files changed (1) hide show

app.py +13 -51

app.py CHANGED Viewed

@@ -145,21 +145,9 @@ def create_scoring_matrix_visual(scores,sequence,image_index=0,mutation_range_st
   filtered_scores=filtered_scores[filtered_scores.position.isin(range(mutation_range_start,mutation_range_end+1))]
   piv=filtered_scores.pivot(index='position',columns='target_AA',values='avg_score').round(4)
-  # Calculate dynamic font size based on matrix dimensions
   mutation_range_len = mutation_range_end - mutation_range_start + 1
-  # Adjust font size based on number of positions
-  if mutation_range_len > 30:
-    fontsize = 8
-  elif mutation_range_len > 20:
-    fontsize = 10
-  elif mutation_range_len > 15:
-    fontsize = 12
-  elif mutation_range_len > 10:
-    fontsize = 14
-  else:
-    fontsize = 16
   # Save CSV file
   csv_path = 'fitness_scoring_substitution_matrix_{}_{}.csv'.format(unique_id, image_index)
@@ -189,22 +177,8 @@ def create_scoring_matrix_visual(scores,sequence,image_index=0,mutation_range_st
   csv_df.to_csv(csv_path, index=False)
   # Continue with visualization
-  # Adjust figure size based on content
-  if mutation_range_len <= 10:
-    fig_width = max(12, len(AA_vocab) * 0.6)
-    fig_height = max(8, mutation_range_len * 0.8)
-  elif mutation_range_len <= 20:
-    fig_width = max(14, len(AA_vocab) * 0.5)
-    fig_height = max(10, mutation_range_len * 0.6)
-  else:
-    fig_width = max(16, len(AA_vocab) * 0.4)
-    fig_height = max(12, mutation_range_len * 0.5)
-  # Limit maximum size
-  fig_width = min(fig_width, 30)
-  fig_height = min(fig_height, 40)
-  _, ax = plt.subplots(figsize=(fig_width, fig_height))
   scores_dict = {}
   valid_mutant_set=set(filtered_scores.mutant)
   ax.tick_params(bottom=True, top=True, left=True, right=True)
@@ -221,42 +195,30 @@ def create_scoring_matrix_visual(scores,sequence,image_index=0,mutation_range_st
             scores_dict[mutant] = float(score_value)
         else:
           scores_dict[mutant]=0.0
-    # Format labels based on available space
-    if fontsize <= 10:
-      # For small fonts, show only score
-      labels = (np.asarray(["{:.2f}".format(value) for _, value in scores_dict.items() ])).reshape(mutation_range_len,len(AA_vocab))
-    else:
-      # For larger fonts, show mutation and score
-      labels = (np.asarray(["{} \n{:.3f}".format(symb,value) for symb, value in scores_dict.items() ])).reshape(mutation_range_len,len(AA_vocab))
     heat = sns.heatmap(piv,annot=labels,fmt="",cmap='RdYlGn',linewidths=0.30,ax=ax,vmin=np.percentile(scores.avg_score,2),vmax=np.percentile(scores.avg_score,98),\
                 cbar_kws={'label': 'Log likelihood ratio (mutant / starting sequence)'},annot_kws={"size": fontsize})
   else:
     heat = sns.heatmap(piv,cmap='RdYlGn',linewidths=0.30,ax=ax,vmin=np.percentile(scores.avg_score,2),vmax=np.percentile(scores.avg_score,98),\
                 cbar_kws={'label': 'Log likelihood ratio (mutant / starting sequence)'},annot_kws={"size": fontsize})
-  # Adjust label sizes proportionally
-  cbar_label_size = max(10, fontsize * 1.2)
-  title_size = max(14, fontsize * 1.5)
-  axis_label_size = max(12, fontsize * 1.3)
-  heat.figure.axes[-1].yaxis.label.set_size(fontsize=cbar_label_size)
-  heat.set_title("Higher predicted scores (green) imply higher protein fitness",fontsize=title_size, pad=20)
-  heat.set_ylabel("Sequence position", fontsize = axis_label_size)
-  heat.set_xlabel("Amino Acid mutation", fontsize = axis_label_size)
   # Set y-axis labels (positions)
-  tick_label_size = max(8, fontsize * 0.8)
   yticklabels = [str(pos)+' ('+sequence[pos-1]+')' for pos in range(mutation_range_start,mutation_range_end+1)]
-  heat.set_yticklabels(yticklabels, fontsize=tick_label_size, rotation=0)
   # Set x-axis labels (amino acids) - ensuring correct number
-  heat.set_xticklabels(list(AA_vocab), fontsize=tick_label_size)
   try:
     plt.tight_layout()
     image_path = 'fitness_scoring_substitution_matrix_{}_{}.png'.format(unique_id, image_index)
-    # Increase DPI for better quality when font is small
-    dpi = 150 if fontsize <= 10 else 100
-    plt.savefig(image_path, dpi=dpi, bbox_inches='tight')
     return image_path, csv_path
   finally:
     plt.close('all')  # Ensure all figures are closed

   filtered_scores=filtered_scores[filtered_scores.position.isin(range(mutation_range_start,mutation_range_end+1))]
   piv=filtered_scores.pivot(index='position',columns='target_AA',values='avg_score').round(4)
+  # Calculate mutation range length
   mutation_range_len = mutation_range_end - mutation_range_start + 1
   # Save CSV file
   csv_path = 'fitness_scoring_substitution_matrix_{}_{}.csv'.format(unique_id, image_index)
   csv_df.to_csv(csv_path, index=False)
   # Continue with visualization
+  # Use large fixed width for clarity, height scales with positions (as in reference)
+  fig, ax = plt.subplots(figsize=(50, mutation_range_len))
   scores_dict = {}
   valid_mutant_set=set(filtered_scores.mutant)
   ax.tick_params(bottom=True, top=True, left=True, right=True)
             scores_dict[mutant] = float(score_value)
         else:
           scores_dict[mutant]=0.0
+    # Format labels as in reference - always show mutation and score with 4 decimal places
+    labels = (np.asarray(["{} \n {:.4f}".format(symb,value) for symb, value in scores_dict.items() ])).reshape(mutation_range_len,len(AA_vocab))
     heat = sns.heatmap(piv,annot=labels,fmt="",cmap='RdYlGn',linewidths=0.30,ax=ax,vmin=np.percentile(scores.avg_score,2),vmax=np.percentile(scores.avg_score,98),\
                 cbar_kws={'label': 'Log likelihood ratio (mutant / starting sequence)'},annot_kws={"size": fontsize})
   else:
     heat = sns.heatmap(piv,cmap='RdYlGn',linewidths=0.30,ax=ax,vmin=np.percentile(scores.avg_score,2),vmax=np.percentile(scores.avg_score,98),\
                 cbar_kws={'label': 'Log likelihood ratio (mutant / starting sequence)'},annot_kws={"size": fontsize})
+  # Use label sizes from reference
+  heat.figure.axes[-1].yaxis.label.set_size(fontsize=int(fontsize*1.5))
+  heat.set_title("Higher predicted scores (green) imply higher protein fitness",fontsize=fontsize*2, pad=40)
+  heat.set_ylabel("Sequence position", fontsize = fontsize*2)
+  heat.set_xlabel("Amino Acid mutation", fontsize = fontsize*2)
   # Set y-axis labels (positions)
   yticklabels = [str(pos)+' ('+sequence[pos-1]+')' for pos in range(mutation_range_start,mutation_range_end+1)]
+  heat.set_yticklabels(yticklabels, fontsize=fontsize, rotation=0)
   # Set x-axis labels (amino acids) - ensuring correct number
+  heat.set_xticklabels(list(AA_vocab), fontsize=fontsize)
   try:
     plt.tight_layout()
     image_path = 'fitness_scoring_substitution_matrix_{}_{}.png'.format(unique_id, image_index)
+    plt.savefig(image_path, dpi=100)
     return image_path, csv_path
   finally:
     plt.close('all')  # Ensure all figures are closed