Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import random
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from io import BytesIO
|
7 |
+
import base64
|
8 |
+
|
9 |
+
# 模拟数据 - 实际使用时需要替换为真实数据
|
10 |
+
species_data = {
|
11 |
+
"human": {"codon_table": {}, "trna": {}, "codon_usage": {}},
|
12 |
+
"mouse": {"codon_table": {}, "trna": {}, "codon_usage": {}},
|
13 |
+
"virus": {"codon_table": {}, "trna": {}, "codon_usage": {}},
|
14 |
+
"Escherichia coli": {"codon_table": {}, "trna": {}, "codon_usage": {}},
|
15 |
+
"酿酒酵母": {"codon_table": {}, "trna": {}, "codon_usage": {}},
|
16 |
+
"Pichia": {"codon_table": {}, "trna": {}, "codon_usage": {}},
|
17 |
+
}
|
18 |
+
|
19 |
+
# 模拟函数 - 实际需要生物信息学算法实现
|
20 |
+
def find_longest_cds(seq):
|
21 |
+
# 简化的ORF查找 - 实际应使用生物信息学库
|
22 |
+
start = seq.find("ATG")
|
23 |
+
stops = [seq.find("TAA", start), seq.find("TAG", start), seq.find("TGA", start)]
|
24 |
+
stops = [s for s in stops if s > start]
|
25 |
+
end = min(stops) + 3 if stops else len(seq)
|
26 |
+
return start, end
|
27 |
+
|
28 |
+
def calculate_cds_variants(protein_seq):
|
29 |
+
# 简化的计算 - 实际应根据密码子表计算
|
30 |
+
aa_count = len(protein_seq)
|
31 |
+
return 2 ** aa_count # 示例值
|
32 |
+
|
33 |
+
def optimize_cds(protein_seq, species, method):
|
34 |
+
# 生成20个优化序列示例
|
35 |
+
results = []
|
36 |
+
for i in range(20):
|
37 |
+
# 实际应根据优化方法生成序列
|
38 |
+
seq = ''.join(random.choices("ACGT", k=len(protein_seq)*3))
|
39 |
+
gc = random.uniform(0.3, 0.7)
|
40 |
+
trna = random.uniform(0.5, 1.0)
|
41 |
+
usage = random.uniform(0.6, 0.95)
|
42 |
+
mfe = random.uniform(-30, -10)
|
43 |
+
score = gc*0.25 + trna*0.25 + usage*0.25 + (-mfe/40)*0.25
|
44 |
+
|
45 |
+
results.append({
|
46 |
+
"Sequence": seq,
|
47 |
+
"GC%": f"{gc*100:.1f}%",
|
48 |
+
"tRNA": f"{trna:.3f}",
|
49 |
+
"Usage": f"{usage:.3f}",
|
50 |
+
"MFE": f"{mfe:.1f}",
|
51 |
+
"Score": f"{score:.3f}"
|
52 |
+
})
|
53 |
+
return pd.DataFrame(results)
|
54 |
+
|
55 |
+
def design_mrna(utr5_candidates, utr3_candidates, cds_seq):
|
56 |
+
# 生成20个设计结果示例
|
57 |
+
designs = []
|
58 |
+
for i in range(20):
|
59 |
+
utr5 = random.choice(utr5_candidates)
|
60 |
+
utr3 = random.choice(utr3_candidates)
|
61 |
+
full_seq = utr5 + cds_seq + utr3
|
62 |
+
mfe = random.uniform(-50, -20)
|
63 |
+
designs.append({
|
64 |
+
"Design": f"Design_{i+1}",
|
65 |
+
"5'UTR": utr5[:10] + "..." if len(utr5) > 13 else utr5,
|
66 |
+
"3'UTR": utr3[:10] + "..." if len(utr3) > 13 else utr3,
|
67 |
+
"MFE": f"{mfe:.1f}",
|
68 |
+
"Sequence": full_seq
|
69 |
+
})
|
70 |
+
return pd.DataFrame(designs)
|
71 |
+
|
72 |
+
# 标注可视化函数
|
73 |
+
def visualize_annotation(seq):
|
74 |
+
start, end = find_longest_cds(seq)
|
75 |
+
html = f"""
|
76 |
+
<div style="font-family: monospace; font-size: 14px; line-height: 1.8;">
|
77 |
+
<div style="background-color: #ffcccc; display: inline-block; padding: 2px;">
|
78 |
+
5'UTR: {seq[:start] if start > 0 else 'N/A'}
|
79 |
+
</div>
|
80 |
+
<div style="background-color: #ccffcc; display: inline-block; padding: 2px;">
|
81 |
+
CDS: {seq[start:end] if start >=0 else 'N/A'}
|
82 |
+
</div>
|
83 |
+
<div style="background-color: #ccccff; display: inline-block; padding: 2px;">
|
84 |
+
3'UTR: {seq[end:] if end < len(seq) else 'N/A'}
|
85 |
+
</div>
|
86 |
+
</div>
|
87 |
+
<p>Annotation Legend:</p>
|
88 |
+
<div style="display: flex; gap: 10px;">
|
89 |
+
<div style="background-color: #ffcccc; padding: 5px;">5'UTR</div>
|
90 |
+
<div style="background-color: #ccffcc; padding: 5px;">CDS</div>
|
91 |
+
<div style="background-color: #ccccff; padding: 5px;">3'UTR</div>
|
92 |
+
</div>
|
93 |
+
"""
|
94 |
+
return html
|
95 |
+
|
96 |
+
# 创建Gradio界面
|
97 |
+
with gr.Blocks(title="Vaccine Designer", theme=gr.themes.Soft()) as app:
|
98 |
+
gr.Markdown("# 🧬 Vaccine Design Platform - Academic Collaboration")
|
99 |
+
|
100 |
+
with gr.Tab("mRNA Annotation"):
|
101 |
+
gr.Markdown("## mRNA Sequence Annotation")
|
102 |
+
mrna_input = gr.Textbox(label="mRNA Sequence", placeholder="Enter mRNA sequence here...")
|
103 |
+
annotate_btn = gr.Button("Annotate Regions")
|
104 |
+
annotation_output = gr.HTML(label="Sequence Annotation")
|
105 |
+
annotate_btn.click(visualize_annotation, inputs=mrna_input, outputs=annotation_output)
|
106 |
+
|
107 |
+
with gr.Tab("CDS Variants"):
|
108 |
+
gr.Markdown("## Calculate Potential CDS Variants")
|
109 |
+
protein_input = gr.Textbox(label="Protein Sequence", placeholder="Enter protein sequence here...")
|
110 |
+
calc_btn = gr.Button("Calculate Variants")
|
111 |
+
variants_output = gr.Number(label="Potential CDS Variants")
|
112 |
+
calc_btn.click(calculate_cds_variants, inputs=protein_input, outputs=variants_output)
|
113 |
+
|
114 |
+
with gr.Tab("CDS Optimization"):
|
115 |
+
gr.Markdown("## Optimize CDS Sequence")
|
116 |
+
with gr.Row():
|
117 |
+
protein_seq = gr.Textbox(label="Protein Sequence")
|
118 |
+
species = gr.Dropdown(list(species_data.keys()), label="Species", value="human")
|
119 |
+
|
120 |
+
method = gr.Radio(["Max GC Content", "tRNA Abundance", "Codon Usage", "MFE Optimization"],
|
121 |
+
label="Optimization Method", value="Max GC Content")
|
122 |
+
|
123 |
+
optimize_btn = gr.Button("Generate Optimized Sequences")
|
124 |
+
results_table = gr.Dataframe(label="Top 20 Optimized Sequences", headers=["Sequence", "GC%", "tRNA", "Usage", "MFE", "Score"])
|
125 |
+
optimize_btn.click(optimize_cds, inputs=[protein_seq, species, method], outputs=results_table)
|
126 |
+
|
127 |
+
# 评分可视化
|
128 |
+
plot = gr.Plot(label="Optimization Scores")
|
129 |
+
def update_plot(df):
|
130 |
+
if df is None or len(df) == 0:
|
131 |
+
return None
|
132 |
+
fig, ax = plt.subplots()
|
133 |
+
scores = [float(x) for x in df["Score"]]
|
134 |
+
ax.bar(range(len(scores)), scores, color='skyblue')
|
135 |
+
ax.set_xlabel("Sequence Rank")
|
136 |
+
ax.set_ylabel("Composite Score")
|
137 |
+
ax.set_title("Optimization Scores of Top Sequences")
|
138 |
+
return fig
|
139 |
+
results_table.change(update_plot, inputs=results_table, outputs=plot)
|
140 |
+
|
141 |
+
with gr.Tab("Full mRNA Design"):
|
142 |
+
gr.Markdown("## Design Full mRNA Sequence")
|
143 |
+
with gr.Row():
|
144 |
+
utr5_upload = gr.File(label="Upload 5'UTR Candidates (txt)")
|
145 |
+
utr3_upload = gr.File(label="Upload 3'UTR Candidates (txt)")
|
146 |
+
cds_input = gr.Textbox(label="CDS Sequence")
|
147 |
+
design_btn = gr.Button("Design mRNA Sequences")
|
148 |
+
design_results = gr.Dataframe(label="Top 20 Designs", headers=["Design", "5'UTR", "3'UTR", "MFE", "Sequence"])
|
149 |
+
design_btn.click(design_mrna, inputs=[utr5_upload, utr3_upload, cds_input], outputs=design_results)
|
150 |
+
|
151 |
+
with gr.Tab("Resources & Links"):
|
152 |
+
gr.Markdown("## Helpful Resources")
|
153 |
+
gr.Markdown("""
|
154 |
+
- [mRNA Designer Platform](https://www.biosino.org/mRNAdesigner/main)
|
155 |
+
- [Nucleic Acid Database](https://ngdc.cncb.ac.cn/ncov/)
|
156 |
+
- [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/)
|
157 |
+
- [ViralZone](https://viralzone.expasy.org/)
|
158 |
+
- [Codon Usage Database](https://www.kazusa.or.jp/codon/)
|
159 |
+
""")
|
160 |
+
|
161 |
+
gr.Markdown("## Download All Results")
|
162 |
+
download_btn = gr.Button("Download Results Package")
|
163 |
+
download_btn.click(lambda: "results.zip", outputs=gr.File(label="Download Results"))
|
164 |
+
|
165 |
+
gr.Markdown("---")
|
166 |
+
gr.HTML("""
|
167 |
+
<div style="text-align: center; padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
|
168 |
+
<p>Academic Collaboration Platform for Vaccine Design | Contact: [email protected]</p>
|
169 |
+
</div>
|
170 |
+
""")
|
171 |
+
|
172 |
+
# 运行应用
|
173 |
+
if __name__ == "__main__":
|
174 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|