|
import gradio as gr |
|
import numpy as np |
|
import random |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from io import BytesIO |
|
import base64 |
|
|
|
|
|
species_data = { |
|
"human": {"codon_table": {}, "trna": {}, "codon_usage": {}}, |
|
"mouse": {"codon_table": {}, "trna": {}, "codon_usage": {}}, |
|
"virus": {"codon_table": {}, "trna": {}, "codon_usage": {}}, |
|
"Escherichia coli": {"codon_table": {}, "trna": {}, "codon_usage": {}}, |
|
"酿酒酵母": {"codon_table": {}, "trna": {}, "codon_usage": {}}, |
|
"Pichia": {"codon_table": {}, "trna": {}, "codon_usage": {}}, |
|
} |
|
|
|
|
|
def find_longest_cds(seq): |
|
|
|
start = seq.find("ATG") |
|
stops = [seq.find("TAA", start), seq.find("TAG", start), seq.find("TGA", start)] |
|
stops = [s for s in stops if s > start] |
|
end = min(stops) + 3 if stops else len(seq) |
|
return start, end |
|
|
|
def calculate_cds_variants(protein_seq): |
|
|
|
aa_count = len(protein_seq) |
|
return 2 ** aa_count |
|
|
|
def optimize_cds(protein_seq, species, method): |
|
|
|
results = [] |
|
for i in range(20): |
|
|
|
seq = ''.join(random.choices("ACGT", k=len(protein_seq)*3)) |
|
gc = random.uniform(0.3, 0.7) |
|
trna = random.uniform(0.5, 1.0) |
|
usage = random.uniform(0.6, 0.95) |
|
mfe = random.uniform(-30, -10) |
|
score = gc*0.25 + trna*0.25 + usage*0.25 + (-mfe/40)*0.25 |
|
|
|
results.append({ |
|
"Sequence": seq, |
|
"GC%": f"{gc*100:.1f}%", |
|
"tRNA": f"{trna:.3f}", |
|
"Usage": f"{usage:.3f}", |
|
"MFE": f"{mfe:.1f}", |
|
"Score": f"{score:.3f}" |
|
}) |
|
return pd.DataFrame(results) |
|
|
|
def design_mrna(utr5_candidates, utr3_candidates, cds_seq): |
|
|
|
designs = [] |
|
for i in range(20): |
|
utr5 = random.choice(utr5_candidates) |
|
utr3 = random.choice(utr3_candidates) |
|
full_seq = utr5 + cds_seq + utr3 |
|
mfe = random.uniform(-50, -20) |
|
designs.append({ |
|
"Design": f"Design_{i+1}", |
|
"5'UTR": utr5[:10] + "..." if len(utr5) > 13 else utr5, |
|
"3'UTR": utr3[:10] + "..." if len(utr3) > 13 else utr3, |
|
"MFE": f"{mfe:.1f}", |
|
"Sequence": full_seq |
|
}) |
|
return pd.DataFrame(designs) |
|
|
|
|
|
def visualize_annotation(seq): |
|
start, end = find_longest_cds(seq) |
|
html = f""" |
|
<div style="font-family: monospace; font-size: 14px; line-height: 1.8;"> |
|
<div style="background-color: #ffcccc; display: inline-block; padding: 2px;"> |
|
5'UTR: {seq[:start] if start > 0 else 'N/A'} |
|
</div> |
|
<div style="background-color: #ccffcc; display: inline-block; padding: 2px;"> |
|
CDS: {seq[start:end] if start >=0 else 'N/A'} |
|
</div> |
|
<div style="background-color: #ccccff; display: inline-block; padding: 2px;"> |
|
3'UTR: {seq[end:] if end < len(seq) else 'N/A'} |
|
</div> |
|
</div> |
|
<p>Annotation Legend:</p> |
|
<div style="display: flex; gap: 10px;"> |
|
<div style="background-color: #ffcccc; padding: 5px;">5'UTR</div> |
|
<div style="background-color: #ccffcc; padding: 5px;">CDS</div> |
|
<div style="background-color: #ccccff; padding: 5px;">3'UTR</div> |
|
</div> |
|
""" |
|
return html |
|
|
|
|
|
with gr.Blocks(title="Vaccine Designer", theme=gr.themes.Soft()) as app: |
|
gr.Markdown("# 🧬 Vaccine Design Platform - Academic Collaboration") |
|
|
|
with gr.Tab("mRNA Annotation"): |
|
gr.Markdown("## mRNA Sequence Annotation") |
|
mrna_input = gr.Textbox(label="mRNA Sequence", placeholder="Enter mRNA sequence here...") |
|
annotate_btn = gr.Button("Annotate Regions") |
|
annotation_output = gr.HTML(label="Sequence Annotation") |
|
annotate_btn.click(visualize_annotation, inputs=mrna_input, outputs=annotation_output) |
|
|
|
with gr.Tab("CDS Variants"): |
|
gr.Markdown("## Calculate Potential CDS Variants") |
|
protein_input = gr.Textbox(label="Protein Sequence", placeholder="Enter protein sequence here...") |
|
calc_btn = gr.Button("Calculate Variants") |
|
variants_output = gr.Number(label="Potential CDS Variants") |
|
calc_btn.click(calculate_cds_variants, inputs=protein_input, outputs=variants_output) |
|
|
|
with gr.Tab("CDS Optimization"): |
|
gr.Markdown("## Optimize CDS Sequence") |
|
with gr.Row(): |
|
protein_seq = gr.Textbox(label="Protein Sequence") |
|
species = gr.Dropdown(list(species_data.keys()), label="Species", value="human") |
|
|
|
method = gr.Radio(["Max GC Content", "tRNA Abundance", "Codon Usage", "MFE Optimization"], |
|
label="Optimization Method", value="Max GC Content") |
|
|
|
optimize_btn = gr.Button("Generate Optimized Sequences") |
|
results_table = gr.Dataframe(label="Top 20 Optimized Sequences", headers=["Sequence", "GC%", "tRNA", "Usage", "MFE", "Score"]) |
|
optimize_btn.click(optimize_cds, inputs=[protein_seq, species, method], outputs=results_table) |
|
|
|
|
|
plot = gr.Plot(label="Optimization Scores") |
|
def update_plot(df): |
|
if df is None or len(df) == 0: |
|
return None |
|
fig, ax = plt.subplots() |
|
scores = [float(x) for x in df["Score"]] |
|
ax.bar(range(len(scores)), scores, color='skyblue') |
|
ax.set_xlabel("Sequence Rank") |
|
ax.set_ylabel("Composite Score") |
|
ax.set_title("Optimization Scores of Top Sequences") |
|
return fig |
|
results_table.change(update_plot, inputs=results_table, outputs=plot) |
|
|
|
with gr.Tab("Full mRNA Design"): |
|
gr.Markdown("## Design Full mRNA Sequence") |
|
with gr.Row(): |
|
utr5_upload = gr.File(label="Upload 5'UTR Candidates (txt)") |
|
utr3_upload = gr.File(label="Upload 3'UTR Candidates (txt)") |
|
cds_input = gr.Textbox(label="CDS Sequence") |
|
design_btn = gr.Button("Design mRNA Sequences") |
|
design_results = gr.Dataframe(label="Top 20 Designs", headers=["Design", "5'UTR", "3'UTR", "MFE", "Sequence"]) |
|
design_btn.click(design_mrna, inputs=[utr5_upload, utr3_upload, cds_input], outputs=design_results) |
|
|
|
with gr.Tab("Resources & Links"): |
|
gr.Markdown("## Helpful Resources") |
|
gr.Markdown(""" |
|
- [mRNA Designer Platform](https://www.biosino.org/mRNAdesigner/main) |
|
- [Nucleic Acid Database](https://ngdc.cncb.ac.cn/ncov/) |
|
- [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/) |
|
- [ViralZone](https://viralzone.expasy.org/) |
|
- [Codon Usage Database](https://www.kazusa.or.jp/codon/) |
|
""") |
|
|
|
gr.Markdown("## Download All Results") |
|
download_btn = gr.Button("Download Results Package") |
|
download_btn.click(lambda: "results.zip", outputs=gr.File(label="Download Results")) |
|
|
|
gr.Markdown("---") |
|
gr.HTML(""" |
|
<div style="text-align: center; padding: 20px; background-color: #f0f0f0; border-radius: 10px;"> |
|
<p>Academic Collaboration Platform for Vaccine Design | Contact: [email protected]</p> |
|
</div> |
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch(server_name="0.0.0.0", server_port=7860) |
|
|