import gradio as gr import numpy as np import random import pandas as pd import matplotlib.pyplot as plt from io import BytesIO import base64 # 模拟数据 - 实际使用时需要替换为真实数据 species_data = { "human": {"codon_table": {}, "trna": {}, "codon_usage": {}}, "mouse": {"codon_table": {}, "trna": {}, "codon_usage": {}}, "virus": {"codon_table": {}, "trna": {}, "codon_usage": {}}, "Escherichia coli": {"codon_table": {}, "trna": {}, "codon_usage": {}}, "酿酒酵母": {"codon_table": {}, "trna": {}, "codon_usage": {}}, "Pichia": {"codon_table": {}, "trna": {}, "codon_usage": {}}, } # 模拟函数 - 实际需要生物信息学算法实现 def find_longest_cds(seq): # 简化的ORF查找 - 实际应使用生物信息学库 start = seq.find("ATG") stops = [seq.find("TAA", start), seq.find("TAG", start), seq.find("TGA", start)] stops = [s for s in stops if s > start] end = min(stops) + 3 if stops else len(seq) return start, end def calculate_cds_variants(protein_seq): # 简化的计算 - 实际应根据密码子表计算 aa_count = len(protein_seq) return 2 ** aa_count # 示例值 def optimize_cds(protein_seq, species, method): # 生成20个优化序列示例 results = [] for i in range(20): # 实际应根据优化方法生成序列 seq = ''.join(random.choices("ACGT", k=len(protein_seq)*3)) gc = random.uniform(0.3, 0.7) trna = random.uniform(0.5, 1.0) usage = random.uniform(0.6, 0.95) mfe = random.uniform(-30, -10) score = gc*0.25 + trna*0.25 + usage*0.25 + (-mfe/40)*0.25 results.append({ "Sequence": seq, "GC%": f"{gc*100:.1f}%", "tRNA": f"{trna:.3f}", "Usage": f"{usage:.3f}", "MFE": f"{mfe:.1f}", "Score": f"{score:.3f}" }) return pd.DataFrame(results) def design_mrna(utr5_candidates, utr3_candidates, cds_seq): # 生成20个设计结果示例 designs = [] for i in range(20): utr5 = random.choice(utr5_candidates) utr3 = random.choice(utr3_candidates) full_seq = utr5 + cds_seq + utr3 mfe = random.uniform(-50, -20) designs.append({ "Design": f"Design_{i+1}", "5'UTR": utr5[:10] + "..." if len(utr5) > 13 else utr5, "3'UTR": utr3[:10] + "..." if len(utr3) > 13 else utr3, "MFE": f"{mfe:.1f}", "Sequence": full_seq }) return pd.DataFrame(designs) # 标注可视化函数 def visualize_annotation(seq): start, end = find_longest_cds(seq) html = f"""
5'UTR: {seq[:start] if start > 0 else 'N/A'}
CDS: {seq[start:end] if start >=0 else 'N/A'}
3'UTR: {seq[end:] if end < len(seq) else 'N/A'}

Annotation Legend:

5'UTR
CDS
3'UTR
""" return html # 创建Gradio界面 with gr.Blocks(title="Vaccine Designer", theme=gr.themes.Soft()) as app: gr.Markdown("# 🧬 Vaccine Design Platform - Academic Collaboration") with gr.Tab("mRNA Annotation"): gr.Markdown("## mRNA Sequence Annotation") mrna_input = gr.Textbox(label="mRNA Sequence", placeholder="Enter mRNA sequence here...") annotate_btn = gr.Button("Annotate Regions") annotation_output = gr.HTML(label="Sequence Annotation") annotate_btn.click(visualize_annotation, inputs=mrna_input, outputs=annotation_output) with gr.Tab("CDS Variants"): gr.Markdown("## Calculate Potential CDS Variants") protein_input = gr.Textbox(label="Protein Sequence", placeholder="Enter protein sequence here...") calc_btn = gr.Button("Calculate Variants") variants_output = gr.Number(label="Potential CDS Variants") calc_btn.click(calculate_cds_variants, inputs=protein_input, outputs=variants_output) with gr.Tab("CDS Optimization"): gr.Markdown("## Optimize CDS Sequence") with gr.Row(): protein_seq = gr.Textbox(label="Protein Sequence") species = gr.Dropdown(list(species_data.keys()), label="Species", value="human") method = gr.Radio(["Max GC Content", "tRNA Abundance", "Codon Usage", "MFE Optimization"], label="Optimization Method", value="Max GC Content") optimize_btn = gr.Button("Generate Optimized Sequences") results_table = gr.Dataframe(label="Top 20 Optimized Sequences", headers=["Sequence", "GC%", "tRNA", "Usage", "MFE", "Score"]) optimize_btn.click(optimize_cds, inputs=[protein_seq, species, method], outputs=results_table) # 评分可视化 plot = gr.Plot(label="Optimization Scores") def update_plot(df): if df is None or len(df) == 0: return None fig, ax = plt.subplots() scores = [float(x) for x in df["Score"]] ax.bar(range(len(scores)), scores, color='skyblue') ax.set_xlabel("Sequence Rank") ax.set_ylabel("Composite Score") ax.set_title("Optimization Scores of Top Sequences") return fig results_table.change(update_plot, inputs=results_table, outputs=plot) with gr.Tab("Full mRNA Design"): gr.Markdown("## Design Full mRNA Sequence") with gr.Row(): utr5_upload = gr.File(label="Upload 5'UTR Candidates (txt)") utr3_upload = gr.File(label="Upload 3'UTR Candidates (txt)") cds_input = gr.Textbox(label="CDS Sequence") design_btn = gr.Button("Design mRNA Sequences") design_results = gr.Dataframe(label="Top 20 Designs", headers=["Design", "5'UTR", "3'UTR", "MFE", "Sequence"]) design_btn.click(design_mrna, inputs=[utr5_upload, utr3_upload, cds_input], outputs=design_results) with gr.Tab("Resources & Links"): gr.Markdown("## Helpful Resources") gr.Markdown(""" - [mRNA Designer Platform](https://www.biosino.org/mRNAdesigner/main) - [Nucleic Acid Database](https://ngdc.cncb.ac.cn/ncov/) - [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/) - [ViralZone](https://viralzone.expasy.org/) - [Codon Usage Database](https://www.kazusa.or.jp/codon/) """) gr.Markdown("## Download All Results") download_btn = gr.Button("Download Results Package") download_btn.click(lambda: "results.zip", outputs=gr.File(label="Download Results")) gr.Markdown("---") gr.HTML("""

Academic Collaboration Platform for Vaccine Design | Contact: bioinfo@university.edu

""") # 运行应用 if __name__ == "__main__": app.launch(server_name="0.0.0.0", server_port=7860)