julse commited on
Commit
e3e8dae
·
verified ·
1 Parent(s): 95b5182

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ from io import BytesIO
7
+ import base64
8
+
9
+ # 模拟数据 - 实际使用时需要替换为真实数据
10
+ species_data = {
11
+ "human": {"codon_table": {}, "trna": {}, "codon_usage": {}},
12
+ "mouse": {"codon_table": {}, "trna": {}, "codon_usage": {}},
13
+ "virus": {"codon_table": {}, "trna": {}, "codon_usage": {}},
14
+ "Escherichia coli": {"codon_table": {}, "trna": {}, "codon_usage": {}},
15
+ "酿酒酵母": {"codon_table": {}, "trna": {}, "codon_usage": {}},
16
+ "Pichia": {"codon_table": {}, "trna": {}, "codon_usage": {}},
17
+ }
18
+
19
+ # 模拟函数 - 实际需要生物信息学算法实现
20
+ def find_longest_cds(seq):
21
+ # 简化的ORF查找 - 实际应使用生物信息学库
22
+ start = seq.find("ATG")
23
+ stops = [seq.find("TAA", start), seq.find("TAG", start), seq.find("TGA", start)]
24
+ stops = [s for s in stops if s > start]
25
+ end = min(stops) + 3 if stops else len(seq)
26
+ return start, end
27
+
28
+ def calculate_cds_variants(protein_seq):
29
+ # 简化的计算 - 实际应根据密码子表计算
30
+ aa_count = len(protein_seq)
31
+ return 2 ** aa_count # 示例值
32
+
33
+ def optimize_cds(protein_seq, species, method):
34
+ # 生成20个优化序列示例
35
+ results = []
36
+ for i in range(20):
37
+ # 实际应根据优化方法生成序列
38
+ seq = ''.join(random.choices("ACGT", k=len(protein_seq)*3))
39
+ gc = random.uniform(0.3, 0.7)
40
+ trna = random.uniform(0.5, 1.0)
41
+ usage = random.uniform(0.6, 0.95)
42
+ mfe = random.uniform(-30, -10)
43
+ score = gc*0.25 + trna*0.25 + usage*0.25 + (-mfe/40)*0.25
44
+
45
+ results.append({
46
+ "Sequence": seq,
47
+ "GC%": f"{gc*100:.1f}%",
48
+ "tRNA": f"{trna:.3f}",
49
+ "Usage": f"{usage:.3f}",
50
+ "MFE": f"{mfe:.1f}",
51
+ "Score": f"{score:.3f}"
52
+ })
53
+ return pd.DataFrame(results)
54
+
55
+ def design_mrna(utr5_candidates, utr3_candidates, cds_seq):
56
+ # 生成20个设计结果示例
57
+ designs = []
58
+ for i in range(20):
59
+ utr5 = random.choice(utr5_candidates)
60
+ utr3 = random.choice(utr3_candidates)
61
+ full_seq = utr5 + cds_seq + utr3
62
+ mfe = random.uniform(-50, -20)
63
+ designs.append({
64
+ "Design": f"Design_{i+1}",
65
+ "5'UTR": utr5[:10] + "..." if len(utr5) > 13 else utr5,
66
+ "3'UTR": utr3[:10] + "..." if len(utr3) > 13 else utr3,
67
+ "MFE": f"{mfe:.1f}",
68
+ "Sequence": full_seq
69
+ })
70
+ return pd.DataFrame(designs)
71
+
72
+ # 标注可视化函数
73
+ def visualize_annotation(seq):
74
+ start, end = find_longest_cds(seq)
75
+ html = f"""
76
+ <div style="font-family: monospace; font-size: 14px; line-height: 1.8;">
77
+ <div style="background-color: #ffcccc; display: inline-block; padding: 2px;">
78
+ 5'UTR: {seq[:start] if start > 0 else 'N/A'}
79
+ </div>
80
+ <div style="background-color: #ccffcc; display: inline-block; padding: 2px;">
81
+ CDS: {seq[start:end] if start >=0 else 'N/A'}
82
+ </div>
83
+ <div style="background-color: #ccccff; display: inline-block; padding: 2px;">
84
+ 3'UTR: {seq[end:] if end < len(seq) else 'N/A'}
85
+ </div>
86
+ </div>
87
+ <p>Annotation Legend:</p>
88
+ <div style="display: flex; gap: 10px;">
89
+ <div style="background-color: #ffcccc; padding: 5px;">5'UTR</div>
90
+ <div style="background-color: #ccffcc; padding: 5px;">CDS</div>
91
+ <div style="background-color: #ccccff; padding: 5px;">3'UTR</div>
92
+ </div>
93
+ """
94
+ return html
95
+
96
+ # 创建Gradio界面
97
+ with gr.Blocks(title="Vaccine Designer", theme=gr.themes.Soft()) as app:
98
+ gr.Markdown("# 🧬 Vaccine Design Platform - Academic Collaboration")
99
+
100
+ with gr.Tab("mRNA Annotation"):
101
+ gr.Markdown("## mRNA Sequence Annotation")
102
+ mrna_input = gr.Textbox(label="mRNA Sequence", placeholder="Enter mRNA sequence here...")
103
+ annotate_btn = gr.Button("Annotate Regions")
104
+ annotation_output = gr.HTML(label="Sequence Annotation")
105
+ annotate_btn.click(visualize_annotation, inputs=mrna_input, outputs=annotation_output)
106
+
107
+ with gr.Tab("CDS Variants"):
108
+ gr.Markdown("## Calculate Potential CDS Variants")
109
+ protein_input = gr.Textbox(label="Protein Sequence", placeholder="Enter protein sequence here...")
110
+ calc_btn = gr.Button("Calculate Variants")
111
+ variants_output = gr.Number(label="Potential CDS Variants")
112
+ calc_btn.click(calculate_cds_variants, inputs=protein_input, outputs=variants_output)
113
+
114
+ with gr.Tab("CDS Optimization"):
115
+ gr.Markdown("## Optimize CDS Sequence")
116
+ with gr.Row():
117
+ protein_seq = gr.Textbox(label="Protein Sequence")
118
+ species = gr.Dropdown(list(species_data.keys()), label="Species", value="human")
119
+
120
+ method = gr.Radio(["Max GC Content", "tRNA Abundance", "Codon Usage", "MFE Optimization"],
121
+ label="Optimization Method", value="Max GC Content")
122
+
123
+ optimize_btn = gr.Button("Generate Optimized Sequences")
124
+ results_table = gr.Dataframe(label="Top 20 Optimized Sequences", headers=["Sequence", "GC%", "tRNA", "Usage", "MFE", "Score"])
125
+ optimize_btn.click(optimize_cds, inputs=[protein_seq, species, method], outputs=results_table)
126
+
127
+ # 评分可视化
128
+ plot = gr.Plot(label="Optimization Scores")
129
+ def update_plot(df):
130
+ if df is None or len(df) == 0:
131
+ return None
132
+ fig, ax = plt.subplots()
133
+ scores = [float(x) for x in df["Score"]]
134
+ ax.bar(range(len(scores)), scores, color='skyblue')
135
+ ax.set_xlabel("Sequence Rank")
136
+ ax.set_ylabel("Composite Score")
137
+ ax.set_title("Optimization Scores of Top Sequences")
138
+ return fig
139
+ results_table.change(update_plot, inputs=results_table, outputs=plot)
140
+
141
+ with gr.Tab("Full mRNA Design"):
142
+ gr.Markdown("## Design Full mRNA Sequence")
143
+ with gr.Row():
144
+ utr5_upload = gr.File(label="Upload 5'UTR Candidates (txt)")
145
+ utr3_upload = gr.File(label="Upload 3'UTR Candidates (txt)")
146
+ cds_input = gr.Textbox(label="CDS Sequence")
147
+ design_btn = gr.Button("Design mRNA Sequences")
148
+ design_results = gr.Dataframe(label="Top 20 Designs", headers=["Design", "5'UTR", "3'UTR", "MFE", "Sequence"])
149
+ design_btn.click(design_mrna, inputs=[utr5_upload, utr3_upload, cds_input], outputs=design_results)
150
+
151
+ with gr.Tab("Resources & Links"):
152
+ gr.Markdown("## Helpful Resources")
153
+ gr.Markdown("""
154
+ - [mRNA Designer Platform](https://www.biosino.org/mRNAdesigner/main)
155
+ - [Nucleic Acid Database](https://ngdc.cncb.ac.cn/ncov/)
156
+ - [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/)
157
+ - [ViralZone](https://viralzone.expasy.org/)
158
+ - [Codon Usage Database](https://www.kazusa.or.jp/codon/)
159
+ """)
160
+
161
+ gr.Markdown("## Download All Results")
162
+ download_btn = gr.Button("Download Results Package")
163
+ download_btn.click(lambda: "results.zip", outputs=gr.File(label="Download Results"))
164
+
165
+ gr.Markdown("---")
166
+ gr.HTML("""
167
+ <div style="text-align: center; padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
168
+ <p>Academic Collaboration Platform for Vaccine Design | Contact: [email protected]</p>
169
+ </div>
170
+ """)
171
+
172
+ # 运行应用
173
+ if __name__ == "__main__":
174
+ app.launch(server_name="0.0.0.0", server_port=7860)