import gradio as gr
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from io import BytesIO
import base64
# 模拟数据 - 实际使用时需要替换为真实数据
species_data = {
"human": {"codon_table": {}, "trna": {}, "codon_usage": {}},
"mouse": {"codon_table": {}, "trna": {}, "codon_usage": {}},
"virus": {"codon_table": {}, "trna": {}, "codon_usage": {}},
"Escherichia coli": {"codon_table": {}, "trna": {}, "codon_usage": {}},
"酿酒酵母": {"codon_table": {}, "trna": {}, "codon_usage": {}},
"Pichia": {"codon_table": {}, "trna": {}, "codon_usage": {}},
}
# 模拟函数 - 实际需要生物信息学算法实现
def find_longest_cds(seq):
# 简化的ORF查找 - 实际应使用生物信息学库
start = seq.find("ATG")
stops = [seq.find("TAA", start), seq.find("TAG", start), seq.find("TGA", start)]
stops = [s for s in stops if s > start]
end = min(stops) + 3 if stops else len(seq)
return start, end
def calculate_cds_variants(protein_seq):
# 简化的计算 - 实际应根据密码子表计算
aa_count = len(protein_seq)
return 2 ** aa_count # 示例值
def optimize_cds(protein_seq, species, method):
# 生成20个优化序列示例
results = []
for i in range(20):
# 实际应根据优化方法生成序列
seq = ''.join(random.choices("ACGT", k=len(protein_seq)*3))
gc = random.uniform(0.3, 0.7)
trna = random.uniform(0.5, 1.0)
usage = random.uniform(0.6, 0.95)
mfe = random.uniform(-30, -10)
score = gc*0.25 + trna*0.25 + usage*0.25 + (-mfe/40)*0.25
results.append({
"Sequence": seq,
"GC%": f"{gc*100:.1f}%",
"tRNA": f"{trna:.3f}",
"Usage": f"{usage:.3f}",
"MFE": f"{mfe:.1f}",
"Score": f"{score:.3f}"
})
return pd.DataFrame(results)
def design_mrna(utr5_candidates, utr3_candidates, cds_seq):
# 生成20个设计结果示例
designs = []
for i in range(20):
utr5 = random.choice(utr5_candidates)
utr3 = random.choice(utr3_candidates)
full_seq = utr5 + cds_seq + utr3
mfe = random.uniform(-50, -20)
designs.append({
"Design": f"Design_{i+1}",
"5'UTR": utr5[:10] + "..." if len(utr5) > 13 else utr5,
"3'UTR": utr3[:10] + "..." if len(utr3) > 13 else utr3,
"MFE": f"{mfe:.1f}",
"Sequence": full_seq
})
return pd.DataFrame(designs)
# 标注可视化函数
def visualize_annotation(seq):
start, end = find_longest_cds(seq)
html = f"""
5'UTR: {seq[:start] if start > 0 else 'N/A'}
CDS: {seq[start:end] if start >=0 else 'N/A'}
3'UTR: {seq[end:] if end < len(seq) else 'N/A'}
Annotation Legend:
"""
return html
# 创建Gradio界面
with gr.Blocks(title="Vaccine Designer", theme=gr.themes.Soft()) as app:
gr.Markdown("# 🧬 Vaccine Design Platform - Academic Collaboration")
with gr.Tab("mRNA Annotation"):
gr.Markdown("## mRNA Sequence Annotation")
mrna_input = gr.Textbox(label="mRNA Sequence", placeholder="Enter mRNA sequence here...")
annotate_btn = gr.Button("Annotate Regions")
annotation_output = gr.HTML(label="Sequence Annotation")
annotate_btn.click(visualize_annotation, inputs=mrna_input, outputs=annotation_output)
with gr.Tab("CDS Variants"):
gr.Markdown("## Calculate Potential CDS Variants")
protein_input = gr.Textbox(label="Protein Sequence", placeholder="Enter protein sequence here...")
calc_btn = gr.Button("Calculate Variants")
variants_output = gr.Number(label="Potential CDS Variants")
calc_btn.click(calculate_cds_variants, inputs=protein_input, outputs=variants_output)
with gr.Tab("CDS Optimization"):
gr.Markdown("## Optimize CDS Sequence")
with gr.Row():
protein_seq = gr.Textbox(label="Protein Sequence")
species = gr.Dropdown(list(species_data.keys()), label="Species", value="human")
method = gr.Radio(["Max GC Content", "tRNA Abundance", "Codon Usage", "MFE Optimization"],
label="Optimization Method", value="Max GC Content")
optimize_btn = gr.Button("Generate Optimized Sequences")
results_table = gr.Dataframe(label="Top 20 Optimized Sequences", headers=["Sequence", "GC%", "tRNA", "Usage", "MFE", "Score"])
optimize_btn.click(optimize_cds, inputs=[protein_seq, species, method], outputs=results_table)
# 评分可视化
plot = gr.Plot(label="Optimization Scores")
def update_plot(df):
if df is None or len(df) == 0:
return None
fig, ax = plt.subplots()
scores = [float(x) for x in df["Score"]]
ax.bar(range(len(scores)), scores, color='skyblue')
ax.set_xlabel("Sequence Rank")
ax.set_ylabel("Composite Score")
ax.set_title("Optimization Scores of Top Sequences")
return fig
results_table.change(update_plot, inputs=results_table, outputs=plot)
with gr.Tab("Full mRNA Design"):
gr.Markdown("## Design Full mRNA Sequence")
with gr.Row():
utr5_upload = gr.File(label="Upload 5'UTR Candidates (txt)")
utr3_upload = gr.File(label="Upload 3'UTR Candidates (txt)")
cds_input = gr.Textbox(label="CDS Sequence")
design_btn = gr.Button("Design mRNA Sequences")
design_results = gr.Dataframe(label="Top 20 Designs", headers=["Design", "5'UTR", "3'UTR", "MFE", "Sequence"])
design_btn.click(design_mrna, inputs=[utr5_upload, utr3_upload, cds_input], outputs=design_results)
with gr.Tab("Resources & Links"):
gr.Markdown("## Helpful Resources")
gr.Markdown("""
- [mRNA Designer Platform](https://www.biosino.org/mRNAdesigner/main)
- [Nucleic Acid Database](https://ngdc.cncb.ac.cn/ncov/)
- [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/)
- [ViralZone](https://viralzone.expasy.org/)
- [Codon Usage Database](https://www.kazusa.or.jp/codon/)
""")
gr.Markdown("## Download All Results")
download_btn = gr.Button("Download Results Package")
download_btn.click(lambda: "results.zip", outputs=gr.File(label="Download Results"))
gr.Markdown("---")
gr.HTML("""
Academic Collaboration Platform for Vaccine Design | Contact: bioinfo@university.edu
""")
# 运行应用
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860)