Spaces:

julse
/

maotao

Sleeping

App Files Files Community

julse commited on Jul 4

Commit

cb2619e

verified ·

1 Parent(s): e3e8dae

Update app.py

Browse files

Files changed (1) hide show

app.py +577 -126

app.py CHANGED Viewed

@@ -1,10 +1,57 @@
 import gradio as gr
 import numpy as np
 import random
 import pandas as pd
 import matplotlib.pyplot as plt
-from io import BytesIO
 import base64
 # 模拟数据 - 实际使用时需要替换为真实数据
 species_data = {
@@ -12,30 +59,93 @@ species_data = {
     "mouse": {"codon_table": {}, "trna": {}, "codon_usage": {}},
     "virus": {"codon_table": {}, "trna": {}, "codon_usage": {}},
     "Escherichia coli": {"codon_table": {}, "trna": {}, "codon_usage": {}},
-    "酿酒酵母": {"codon_table": {}, "trna": {}, "codon_usage": {}},
     "Pichia": {"codon_table": {}, "trna": {}, "codon_usage": {}},
 }
-# 模拟函数 - 实际需要生物信息学算法实现
-def find_longest_cds(seq):
-    # 简化的ORF查找 - 实际应使用生物信息学库
-    start = seq.find("ATG")
-    stops = [seq.find("TAA", start), seq.find("TAG", start), seq.find("TGA", start)]
-    stops = [s for s in stops if s > start]
-    end = min(stops) + 3 if stops else len(seq)
-    return start, end
 def calculate_cds_variants(protein_seq):
-    # 简化的计算 - 实际应根据密码子表计算
     aa_count = len(protein_seq)
-    return 2 ** aa_count  # 示例值
-def optimize_cds(protein_seq, species, method):
     # 生成20个优化序列示例
     results = []
     for i in range(20):
-        # 实际应根据优化方法生成序列
         seq = ''.join(random.choices("ACGT", k=len(protein_seq)*3))
         gc = random.uniform(0.3, 0.7)
         trna = random.uniform(0.5, 1.0)
         usage = random.uniform(0.6, 0.95)
@@ -43,132 +153,473 @@ def optimize_cds(protein_seq, species, method):
         score = gc*0.25 + trna*0.25 + usage*0.25 + (-mfe/40)*0.25
         results.append({
-            "Sequence": seq,
             "GC%": f"{gc*100:.1f}%",
             "tRNA": f"{trna:.3f}",
             "Usage": f"{usage:.3f}",
             "MFE": f"{mfe:.1f}",
             "Score": f"{score:.3f}"
         })
-    return pd.DataFrame(results)
-def design_mrna(utr5_candidates, utr3_candidates, cds_seq):
     # 生成20个设计结果示例
     designs = []
     for i in range(20):
-        utr5 = random.choice(utr5_candidates)
-        utr3 = random.choice(utr3_candidates)
         full_seq = utr5 + cds_seq + utr3
         mfe = random.uniform(-50, -20)
         designs.append({
             "Design": f"Design_{i+1}",
-            "5'UTR": utr5[:10] + "..." if len(utr5) > 13 else utr5,
-            "3'UTR": utr3[:10] + "..." if len(utr3) > 13 else utr3,
             "MFE": f"{mfe:.1f}",
-            "Sequence": full_seq
         })
-    return pd.DataFrame(designs)
-# 标注可视化函数
-def visualize_annotation(seq):
-    start, end = find_longest_cds(seq)
-    html = f"""
-    <div style="font-family: monospace; font-size: 14px; line-height: 1.8;">
-        <div style="background-color: #ffcccc; display: inline-block; padding: 2px;">
-            5'UTR: {seq[:start] if start > 0 else 'N/A'}
-        </div>
-        <div style="background-color: #ccffcc; display: inline-block; padding: 2px;">
-            CDS: {seq[start:end] if start >=0 else 'N/A'}
-        </div>
-        <div style="background-color: #ccccff; display: inline-block; padding: 2px;">
-            3'UTR: {seq[end:] if end < len(seq) else 'N/A'}
-        </div>
-    </div>
-    <p>Annotation Legend:</p>
-    <div style="display: flex; gap: 10px;">
-        <div style="background-color: #ffcccc; padding: 5px;">5'UTR</div>
-        <div style="background-color: #ccffcc; padding: 5px;">CDS</div>
-        <div style="background-color: #ccccff; padding: 5px;">3'UTR</div>
-    </div>
-    """
-    return html
-# 创建Gradio界面
-with gr.Blocks(title="Vaccine Designer", theme=gr.themes.Soft()) as app:
-    gr.Markdown("# 🧬 Vaccine Design Platform - Academic Collaboration")
-    with gr.Tab("mRNA Annotation"):
-        gr.Markdown("## mRNA Sequence Annotation")
-        mrna_input = gr.Textbox(label="mRNA Sequence", placeholder="Enter mRNA sequence here...")
-        annotate_btn = gr.Button("Annotate Regions")
-        annotation_output = gr.HTML(label="Sequence Annotation")
-        annotate_btn.click(visualize_annotation, inputs=mrna_input, outputs=annotation_output)
-    with gr.Tab("CDS Variants"):
-        gr.Markdown("## Calculate Potential CDS Variants")
-        protein_input = gr.Textbox(label="Protein Sequence", placeholder="Enter protein sequence here...")
-        calc_btn = gr.Button("Calculate Variants")
-        variants_output = gr.Number(label="Potential CDS Variants")
-        calc_btn.click(calculate_cds_variants, inputs=protein_input, outputs=variants_output)
-    with gr.Tab("CDS Optimization"):
-        gr.Markdown("## Optimize CDS Sequence")
-        with gr.Row():
-            protein_seq = gr.Textbox(label="Protein Sequence")
-            species = gr.Dropdown(list(species_data.keys()), label="Species", value="human")
-        method = gr.Radio(["Max GC Content", "tRNA Abundance", "Codon Usage", "MFE Optimization"],
-                          label="Optimization Method", value="Max GC Content")
-        optimize_btn = gr.Button("Generate Optimized Sequences")
-        results_table = gr.Dataframe(label="Top 20 Optimized Sequences", headers=["Sequence", "GC%", "tRNA", "Usage", "MFE", "Score"])
-        optimize_btn.click(optimize_cds, inputs=[protein_seq, species, method], outputs=results_table)
-        # 评分可视化
-        plot = gr.Plot(label="Optimization Scores")
-        def update_plot(df):
-            if df is None or len(df) == 0:
-                return None
-            fig, ax = plt.subplots()
-            scores = [float(x) for x in df["Score"]]
-            ax.bar(range(len(scores)), scores, color='skyblue')
-            ax.set_xlabel("Sequence Rank")
-            ax.set_ylabel("Composite Score")
-            ax.set_title("Optimization Scores of Top Sequences")
-            return fig
-        results_table.change(update_plot, inputs=results_table, outputs=plot)
-    with gr.Tab("Full mRNA Design"):
-        gr.Markdown("## Design Full mRNA Sequence")
-        with gr.Row():
-            utr5_upload = gr.File(label="Upload 5'UTR Candidates (txt)")
-            utr3_upload = gr.File(label="Upload 3'UTR Candidates (txt)")
-        cds_input = gr.Textbox(label="CDS Sequence")
-        design_btn = gr.Button("Design mRNA Sequences")
-        design_results = gr.Dataframe(label="Top 20 Designs", headers=["Design", "5'UTR", "3'UTR", "MFE", "Sequence"])
-        design_btn.click(design_mrna, inputs=[utr5_upload, utr3_upload, cds_input], outputs=design_results)
-    with gr.Tab("Resources & Links"):
-        gr.Markdown("## Helpful Resources")
-        gr.Markdown("""
-        - [mRNA Designer Platform](https://www.biosino.org/mRNAdesigner/main)
-        - [Nucleic Acid Database](https://ngdc.cncb.ac.cn/ncov/)
-        - [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/)
-        - [ViralZone](https://viralzone.expasy.org/)
-        - [Codon Usage Database](https://www.kazusa.or.jp/codon/)
-        """)
-        gr.Markdown("## Download All Results")
-        download_btn = gr.Button("Download Results Package")
-        download_btn.click(lambda: "results.zip", outputs=gr.File(label="Download Results"))
-    gr.Markdown("---")
-    gr.HTML("""
-    <div style="text-align: center; padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
-        <p>Academic Collaboration Platform for Vaccine Design | Contact: bioinfo@university.edu</p>
-    </div>
-    """)
-# 运行应用
 if __name__ == "__main__":
-    app.launch(server_name="0.0.0.0", server_port=7860)

+import html
+from typing import Tuple
 import gradio as gr
 import numpy as np
 import random
 import pandas as pd
 import matplotlib.pyplot as plt
+from io import BytesIO, StringIO
 import base64
+import json
+AA_str = 'ACDEFGHIKLMNPQRSTVWY*-'.lower()
+AA_TO_CODONS = {"F": ["TTT","TTC"],
+                "L": ["TTA", "TTG", "CTT", "CTC", "CTA", "CTG"],
+                "I": ["ATT", "ATC", "ATA"],
+                "M": ["ATG"],
+                "V": ["GTT", "GTC", "GTA", "GTG"],
+                "S": ["TCT", "TCC", "TCA", "TCG", "AGT", "AGC"],
+                "P": ["CCT", "CCC", "CCA", "CCG"],
+                "T": ["ACT", "ACC", "ACA", "ACG"],
+                "A": ["GCT", "GCC", "GCA", "GCG"],
+                "Y": ["TAT", "TAC"],
+                "H": ["CAT", "CAC"],
+                "Q": ["CAA", "CAG"],
+                "N": ["AAT", "AAC"],
+                "K": ["AAA", "AAG"],
+                "D": ["GAT", "GAC"],
+                "E": ["GAA", "GAG"],
+                "C": ["TGT", "TGC"],
+                "W": ["TGG"],
+                "R": ["CGT", "CGC", "CGA", "CGG", "AGA", "AGG"],
+                "G": ["GGT", "GGC", "GGA", "GGG"],
+                "*": ["TAA", "TAG", "TGA"]}
+def reverse_dictionary(dictionary):
+    """Return dict of {value: key, ->}
+    Input:
+    dictionary: dict of {key: [value, ->], ->}
+    Output:
+    reverse_dictionary: dict of {value: key, ->}
+    """
+    reverse_dictionary = {}
+    for key, values in dictionary.items():
+        for value in values:
+            reverse_dictionary[value] = key
+    return reverse_dictionary
+CODON_TO_AA = reverse_dictionary(AA_TO_CODONS)
 # 模拟数据 - 实际使用时需要替换为真实数据
 species_data = {
     "mouse": {"codon_table": {}, "trna": {}, "codon_usage": {}},
     "virus": {"codon_table": {}, "trna": {}, "codon_usage": {}},
     "Escherichia coli": {"codon_table": {}, "trna": {}, "codon_usage": {}},
+    "saccharomyces cerevisiae": {"codon_table": {}, "trna": {}, "codon_usage": {}},
     "Pichia": {"codon_table": {}, "trna": {}, "codon_usage": {}},
 }
+# 示例数据
+EXAMPLE_PROTEIN = "MSFSRRPKITKSDIVDQISLNIRNNNLKLEKKYIRLVIDAFFEELKGNLCLNNVIEFRSFGTFEVRKRKGRLNARNPQTGEYVKVLDHHVAYFRPGKDLKERVWGIKG"
+EXAMPLE_CDS = "atgagctttagccgccgcccgaaaattaccaaaagcgatattgtggatcagattagcctg\
+aacattcgcaacaacaacctgaaactggaaaaaaaatatattcgcctggtgattgatgcg\
+ttttttgaagaactgaaaggcaacctgtgcctgaacaacgtgattgaatttcgcagcttt\
+ggcacctttgaagtgcgcaaacgcaaaggccgcctgaacgcgcgcaacccgcagaccggc\
+gaatatgtgaaagtgctggatcatcatgtggcgtattttcgcccgggcaaagatctgaaa\
+gaacgcgtgtggggcattaaaggc".upper().replace('T', 'U')
+EXAMPLE_UTR5 = "GAAAAGAGCCCCGGAAAGGAUCUAUCCCUUCCUGUUCUGCUGCACGCAAAAGAACAGCCAAGGGGGAGGCCACC"
+EXAMPLE_UTR3 = "GCUCGCUUUCUUGCUGUCCAAUUUCUAUUAAAGGUUCCUUUGUUCCCUAAGUCCAACUACUAAACUGGGGGAUAUUAUGAAGGGCCUUGAGCAUCUGGAUUCUGCCUAAUAAAAAACAUUUAUUUUCAUUGCAA"
+EXAMPLE_MRNA = EXAMPLE_UTR5 + EXAMPLE_CDS + EXAMPLE_UTR3
+def find_longest_cds(seq: str) -> Tuple[int, int]:
+    """
+    在mRNA序列中查找最长的CDS区域
+    参数:
+        seq: mRNA序列
+    返回:
+        (start, end): CDS区域的起始和结束索引
+    """
+    seq = seq.upper().replace('U', 'T')
+    best_start = -1
+    best_end = -1
+    max_length = 0
+    # 尝试所有可能的阅读框
+    for frame in range(3):
+        in_orf = False
+        current_start = -1
+        for pos in range(frame, len(seq) - 2, 3):
+            codon = seq[pos:pos + 3]
+            # 如果是起始密码子
+            if codon == "ATG" and not in_orf:
+                in_orf = True
+                current_start = pos
+            # 如果是终止密码子
+            elif in_orf and codon in ["TAA", "TAG", "TGA"]:
+                orf_length = pos - current_start
+                if orf_length > max_length:
+                    max_length = orf_length
+                    best_start = current_start
+                    best_end = pos + 3
+                in_orf = False
+        # 处理没有终止密码子的情况
+        if in_orf:
+            orf_length = len(seq) - current_start
+            if orf_length > max_length:
+                max_length = orf_length
+                best_start = current_start
+                best_end = len(seq)
+    return best_start, best_end
 def calculate_cds_variants(protein_seq):
+    if not protein_seq:
+        return 0
     aa_count = len(protein_seq)
+    return min(2 ** aa_count, 10**15)  # 限制上限避免过大数字
+def optimize_cds(protein_seq, species, method, status_update):
+    if not protein_seq:
+        status_update("❌ Error: Please enter a protein sequence")
+        return pd.DataFrame(), None
+    status_update("🔄 Optimizing CDS sequences...")
+    # 计算潜在变异数
+    variants = calculate_cds_variants(protein_seq)
     # 生成20个优化序列示例
     results = []
     for i in range(20):
         seq = ''.join(random.choices("ACGT", k=len(protein_seq)*3))
+        # 序列截断显示
+        seq_display = seq[:30] + "..." if len(seq) > 30 else seq
         gc = random.uniform(0.3, 0.7)
         trna = random.uniform(0.5, 1.0)
         usage = random.uniform(0.6, 0.95)
         score = gc*0.25 + trna*0.25 + usage*0.25 + (-mfe/40)*0.25
         results.append({
+            "Rank": i+1,
+            "Sequence": seq_display,
+            "Full_Sequence": seq,  # 完整序列用于下载
             "GC%": f"{gc*100:.1f}%",
             "tRNA": f"{trna:.3f}",
             "Usage": f"{usage:.3f}",
             "MFE": f"{mfe:.1f}",
             "Score": f"{score:.3f}"
         })
+    df = pd.DataFrame(results)
+    display_df = df.drop(columns=['Full_Sequence'])  # 显示时不包含完整序列
+    # 生成图表
+    fig, ax = plt.subplots(figsize=(10, 6))
+    scores = [float(x) for x in df["Score"]]
+    bars = ax.bar(range(1, len(scores)+1), scores, color='skyblue', alpha=0.7)
+    ax.set_xlabel("Sequence Rank")
+    ax.set_ylabel("Composite Score")
+    ax.set_title(f"CDS Optimization Results ({method})")
+    ax.grid(True, alpha=0.3)
+    # 高亮前5名
+    for i in range(min(5, len(bars))):
+        bars[i].set_color('orange')
+    status_update(f"✅ Successfully generated {len(results)} optimized sequences. Potential variants: {variants:,}")
+    return display_df, fig
+def design_mrna(utr5_file, utr3_file, cds_seq, status_update):
+    if not cds_seq:
+        status_update("❌ Error: Please enter a CDS sequence")
+        return pd.DataFrame()
+    status_update("🔄 Designing mRNA sequences...")
+    # 默认UTR候选序列
+    default_utr5 = ["GGGAAAUAAGAGAGAAAAGAAGAGUAAGAAGAAAUAUAAGAGCCACCAUGG",
+                    "GGGAAAUAAGAGAGAAAAGAAGAGUAAGAAGAAAUAUAAGAGCCACCAUGG"]
+    default_utr3 = ["AAUAAAGCUUUUGCUUUUGUGGUGAAAUUGUUAAUAAACUAUUUUUUUUUU",
+                    "AAUAAAGCUUUUGCUUUUGUGGUGAAAUUGUUAAUAAACUAUUUUUUUUUU"]
     # 生成20个设计结果示例
     designs = []
     for i in range(20):
+        utr5 = random.choice(default_utr5)
+        utr3 = random.choice(default_utr3)
         full_seq = utr5 + cds_seq + utr3
+        # 序列截断显示
+        full_seq_display = full_seq[:40] + "..." if len(full_seq) > 40 else full_seq
         mfe = random.uniform(-50, -20)
+        stability = random.uniform(0.6, 0.9)
         designs.append({
+            "Rank": i+1,
             "Design": f"Design_{i+1}",
+            "5'UTR": utr5[:15] + "..." if len(utr5) > 15 else utr5,
+            "3'UTR": utr3[:15] + "..." if len(utr3) > 15 else utr3,
             "MFE": f"{mfe:.1f}",
+            "Stability": f"{stability:.3f}",
+            "Sequence": full_seq_display,
+            "Full_Sequence": full_seq  # 完整序列用于下载
         })
+    df = pd.DataFrame(designs)
+    display_df = df.drop(columns=['Full_Sequence'])  # 显示时不包含完整序列
+    status_update(f"✅ Successfully designed {len(designs)} mRNA sequences")
+    return display_df
+def download_cds_results(results_df):
+    if results_df is None or len(results_df) == 0:
+        return None
+    # 重新添加完整序列用于下载
+    download_data = []
+    for idx, row in results_df.iterrows():
+        download_data.append({
+            "Rank": row["Rank"],
+            "Full_Sequence": ''.join(random.choices("ACGT", k=150)),  # 模拟完整序列
+            "GC%": row["GC%"],
+            "tRNA": row["tRNA"],
+            "Usage": row["Usage"],
+            "MFE": row["MFE"],
+            "Score": row["Score"]
+        })
+    download_df = pd.DataFrame(download_data)
+    # 保存为CSV
+    csv_buffer = StringIO()
+    download_df.to_csv(csv_buffer, index=False)
+    csv_content = csv_buffer.getvalue()
+    # 创建临时文件
+    filename = "cds_optimization_results.csv"
+    with open(filename, 'w') as f:
+        f.write(csv_content)
+    return filename
+def download_mrna_results(results_df):
+    if results_df is None or len(results_df) == 0:
+        return None
+    # 重新添加完整序列用于下载
+    download_data = []
+    for idx, row in results_df.iterrows():
+        download_data.append({
+            "Rank": row["Rank"],
+            "Design": row["Design"],
+            "Full_Sequence": ''.join(random.choices("ACGT", k=300)),  # 模拟完整序列
+            "5'UTR": row["5'UTR"],
+            "3'UTR": row["3'UTR"],
+            "MFE": row["MFE"],
+            "Stability": row["Stability"]
+        })
+    download_df = pd.DataFrame(download_data)
+    # 保存为CSV
+    csv_buffer = StringIO()
+    download_df.to_csv(csv_buffer, index=False)
+    csv_content = csv_buffer.getvalue()
+    # 创建临时文件
+    filename = "mrna_design_results.csv"
+    with open(filename, 'w') as f:
+        f.write(csv_content)
+    return filename
+def validate_dna_sequence(seq):
+    if len(set(seq)-set('ACGTU'))>0:
+        return False, str(set(seq)-set('ACGTU'))
+    return True, ""
+def translate_cds(cds_seq,repeat=1):
+    cds_seq = cds_seq.upper().replace('U', 'T')
+    amino_acid_list = []
+    for i in range(0, len(cds_seq), 3):
+        codon = cds_seq[i:i + 3]
+        amino_acid_list.append(CODON_TO_AA.get(codon, '-') * repeat)
+    amino_acid_seq = ''.join(amino_acid_list)
+    return amino_acid_seq
+class MaoTaoWeb:
+    def __init__(self):
+        self.app = self.design_app()
+    def design_app(self):
+        # 创建Gradio界面
+        with gr.Blocks(title="Vaccine Designer", theme=gr.themes.Soft()) as app:
+            gr.Markdown("# 🧬 Vaccine Design Platform")
+            gr.Markdown("*Academic Collaboration Platform for mRNA Vaccine Design*")
+            # 全局状态显示
+            self.status_display = gr.Textbox(
+                label="Status",
+                value="Ready to start",
+                interactive=False,
+                container=True
+            )
+            # 创建各个标签页
+            self.mrna_annotation_tab()
+            self.cds_optimization_tab()
+            self.mrna_design_tab()
+            self.resources_tab()
+        return app
+    def mrna_annotation_tab(self):
+        with gr.Tab("🔬 mRNA Annotation"):
+            gr.Markdown("## mRNA Sequence Annotation")
+            with gr.Row():
+                with gr.Column(scale=3):
+                    mrna_input = gr.Textbox(
+                        label="mRNA Sequence",
+                        placeholder="Enter mRNA sequence here...",
+                        lines=5,
+                        max_lines=10
+                    )
+                with gr.Column(scale=1):
+                    start_position = gr.Number(
+                        label="CDS Start",
+                        value=-1,
+                        interactive=True,
+                        precision=0,
+                    )
+                    stop_position = gr.Number(
+                        label="CDS End",
+                        value=-1,
+                        interactive=True,
+                        precision=0,
+                    )
+            with gr.Row():
+                example_btn = gr.Button("Load Example", variant="secondary")
+                annotate_btn = gr.Button("Annotate Regions", variant="primary")
+            with gr.Row():
+                annotation_output = gr.HTML(
+                    label="Sequence Regions",
+                    value="<div style='font-family: monospace;'>Results will appear here</div>"
+                )
+            def annotate_sequence(seq,start=-1,end=-1):
+                if not seq:
+                    return "<div style='color: red;'>Please enter a sequence</div>", "❌error"
+                if not validate_dna_sequence(seq):
+                    return "<div style='color: red;'>Invalid sequence. Only A, C, G, T/U allowed.</div>", "❌error"
+                if start ==-1 and end ==-1:
+                    start, end = find_longest_cds(seq)
+                    status_msg = f"✅ Found CDS at position {start} to {end}"
+                else:
+                    status_msg = f"✅ Using user-defined CDS at position {start} to {end}"
+                if start == -1:
+                    return "<div style='color: red;'>No CDS found in sequence</div>", "❌error"
+                    # 提取CDS序列
+                cds_seq = seq[start:end]
+                # 翻译CDS为氨基酸序列
+                aa_seq = translate_cds(cds_seq)
+                # 创建带颜色的HTML结果
+                html_result = "<div style='font-family: monospace; white-space: pre; margin-left: 15px;'>"
+                frame_lenth = 60
+                # CDS and proten
+                cds_formatted = '\n'.join([cds_seq[i:i + frame_lenth] for i in range(0, len(cds_seq), frame_lenth)])
+                aa_formatted = '\n'.join([aa_seq[i:i + frame_lenth] for i in range(0, len(aa_seq), frame_lenth)])
+                html_result += f"{frame_lenth} nt per line\n\n<span style='font-weight: bold;'>CDS ({len(cds_seq)} bp):\n{cds_formatted}\n\n</span>"
+                html_result += f"<span style=' font-weight: bold;'>Protein ({len(aa_seq)} bp):\n{aa_formatted}\n\n</span>"
+                # 5'UTR部分 - 蓝色
+                if start > 0:
+                    utr5 = html.escape(seq[:start])
+                    # 每50个字符一组显示
+                    utr5_formatted = '\n'.join([utr5[i:i + frame_lenth] for i in range(0, len(utr5), frame_lenth)])
+                    html_result += f"<span style='color: #006400; font-weight: bold;'>5'UTR ({len(utr5)} bp):\n{utr5_formatted}\n</span>\n"
+                else:
+                    html_result += f"<span style='color: #006400; font-weight: bold;'>5'UTR:\nN/A\n</span>\n"
+                if end - start > 0:
+                    # CDS部分 - 绿色
+                    html_result += f"<span style='color: blue; font-weight: bold;'>CDS align ({len(cds_seq)} bp):\n"
+                    # 格式化显示CDS序列和对应的氨基酸
+                    for i in range(0, len(cds_seq), frame_lenth):
+                    # 显示核苷酸序列
+                        nt_chunk = cds_seq[i:i + frame_lenth]
+                        nt_formatted = ' '.join([nt_chunk[j:j + 3] for j in range(0, len(nt_chunk), 3)])
+                        html_result += f"{nt_formatted}\n"
+                        # 显示对应的氨基酸序列
+                        aa_start = i // 3
+                        aa_end = min(aa_start + frame_lenth//3, len(aa_seq))
+                        aa_chunk = aa_seq[aa_start:aa_end]
+                        aa_formatted = '   '.join(aa_chunk)  # 每个氨基酸之间加三个空格
+                        # 添加空格对齐氨基酸和密码子
+                        alignment = ' ' * (len(nt_formatted.split()[0]) // 2)
+                        html_result += f"{alignment}{aa_formatted}\n"
+                    html_result += "</span>\n"
+                # 3'UTR部分 - 紫色
+                if end !=-1 and end < len(seq):
+                    utr3 = html.escape(seq[end:])
+                    # 每50个字符一组显示
+                    utr3_formatted = '\n'.join([utr3[i:i + frame_lenth] for i in range(0, len(utr3), frame_lenth)])
+                    html_result += f"<span style='color: purple; font-weight: bold;'>3'UTR ({len(utr3)} bp):\n{utr3_formatted}\n</span>"
+                else:
+                    html_result += "<span style='color: purple; font-weight: bold;'>3'UTR: </span>N/A"
+                return html_result,status_msg
+            annotate_btn.click(
+                annotate_sequence,
+                inputs=[mrna_input,start_position,stop_position],
+                outputs=[annotation_output,self.status_display]
+            )
+            example_btn.click(
+                lambda: [EXAMPLE_MRNA, -1, -1],
+                outputs=[mrna_input,start_position,stop_position]
+            )
+    def cds_optimization_tab(self):
+        with gr.Tab("🧬 CDS Optimization"):
+            gr.Markdown("## CDS Sequence Optimization")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    protein_seq = gr.Textbox(
+                        label="Protein Sequence",
+                        placeholder="Enter protein sequence here...",
+                        lines=3
+                    )
+                    cds_example_btn = gr.Button("Load Example", variant="secondary")
+                with gr.Column(scale=1):
+                    species = gr.Dropdown(
+                        choices=list(species_data.keys()),
+                        label="Target Species",
+                        value="human"
+                    )
+                    method = gr.Radio(
+                        choices=["Max GC Content", "tRNA Abundance", "Codon Usage", "MFE Optimization"],
+                        label="Optimization Method",
+                        value="Max GC Content"
+                    )
+            with gr.Row():
+                optimize_btn = gr.Button("🚀 Optimize CDS", variant="primary", scale=2)
+                variants_display = gr.Number(
+                    label="Potential Variants",
+                    value=0,
+                    interactive=False,
+                    scale=1
+                )
+            with gr.Row():
+                results_table = gr.Dataframe(
+                    label="Optimization Results",
+                    headers=["Rank", "Sequence", "GC%", "tRNA", "Usage", "MFE", "Score"],
+                    datatype=["number", "str", "str", "str", "str", "str", "str"],
+                    col_count=(7, "fixed"),
+                    wrap=True
+                )
+            optimization_plot = gr.Plot(label="Score Distribution")
+            with gr.Row():
+                download_cds_btn = gr.Button("📥 Download CDS Results", variant="secondary")
+                cds_download_file = gr.File(label="Download File", visible=False)
+            def optimize_and_update(protein_seq, species, method):
+                # 更新状态
+                status_msg = self.status_display.update("🔄 Optimizing CDS sequences...")
+                # 执行优化
+                df, plot = optimize_cds(protein_seq, species, method,status_msg)
+                # 计算变异数
+                variants = calculate_cds_variants(protein_seq) if protein_seq else 0
+                # 最终状态
+                final_status = f"✅ Optimization complete! Generated {len(df)} sequences with {variants:,} potential variants"
+                self.status_display.update(final_status)
+                return df, plot, variants
+            optimize_btn.click(
+                optimize_and_update,
+                inputs=[protein_seq, species, method],
+                outputs=[results_table, optimization_plot, variants_display]
+            )
+            cds_example_btn.click(lambda: EXAMPLE_PROTEIN, outputs=protein_seq)
+            download_cds_btn.click(
+                download_cds_results,
+                inputs=results_table,
+                outputs=cds_download_file
+            )
+    def mrna_design_tab(self):
+        with gr.Tab("🧪 mRNA Design"):
+            gr.Markdown("## Full mRNA Sequence Design")
+            with gr.Row():
+                with gr.Column():
+                    utr5_upload = gr.File(
+                        label="5'UTR Candidates (Optional)",
+                        file_types=[".txt"]
+                    )
+                    utr3_upload = gr.File(
+                        label="3'UTR Candidates (Optional)",
+                        file_types=[".txt"]
+                    )
+                with gr.Column():
+                    cds_input = gr.Textbox(
+                        label="CDS Sequence",
+                        placeholder="Enter CDS sequence here...",
+                        lines=4
+                    )
+                    mrna_example_btn = gr.Button("Load Example", variant="secondary")
+            design_btn = gr.Button("🎯 Design mRNA", variant="primary")
+            design_results = gr.Dataframe(
+                label="mRNA Design Results",
+                headers=["Rank", "Design", "5'UTR", "3'UTR", "MFE", "Stability", "Sequence"],
+                datatype=["number", "str", "str", "str", "str", "str", "str"],
+                col_count=(7, "fixed"),
+                wrap=True
+            )
+            with gr.Row():
+                download_mrna_btn = gr.Button("📥 Download mRNA Results", variant="secondary")
+                mrna_download_file = gr.File(label="Download File", visible=False)
+            def design_and_update(utr5_file, utr3_file, cds_seq):
+                # 更新状态
+                status_msg = self.status_display.update("🔄 Designing mRNA sequences...")
+                # 执行设计
+                df = design_mrna(utr5_file, utr3_file, cds_seq)
+                # 最终状态
+                final_status = f"✅ mRNA design complete! Generated {len(df)} design variants"
+                self.status_display.update(final_status)
+                return df
+            design_btn.click(
+                design_and_update,
+                inputs=[utr5_upload, utr3_upload, cds_input],
+                outputs=[design_results]
+            )
+            mrna_example_btn.click(lambda: EXAMPLE_CDS, outputs=cds_input)
+            download_mrna_btn.click(
+                download_mrna_results,
+                inputs=design_results,
+                outputs=mrna_download_file
+            )
+    def resources_tab(self):
+        with gr.Tab("📚 Resources"):
+            gr.Markdown("## Bioinformatics Resources")
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Databases")
+                    gr.Markdown("""
+                    - [NCBI GenBank](https://www.ncbi.nlm.nih.gov/genbank/)
+                    - [Nucleic Acid Database](https://ngdc.cncb.ac.cn/ncov/)
+                    - [Codon Usage Database](https://www.kazusa.or.jp/codon/)
+                    - [ViralZone](https://viralzone.expasy.org/)
+                    - [bioinformatics tool](https://www.bioinformatics.org/sms2/rev_trans.html)
+                    """)
+                with gr.Column():
+                    gr.Markdown("### Tools")
+                    gr.Markdown("""
+                    - [mRNA Designer Platform](https://www.biosino.org/mRNAdesigner/main)
+                    - [ViennaRNA Package](https://www.tbi.univie.ac.at/RNA/)
+                    - [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi)
+                    - [Primer3](https://primer3.org/)
+                    """)
+            gr.Markdown("---")
+            gr.Markdown("### Contact Information")
+            gr.Markdown("Academic Collaboration Platform | Email: [email protected]")
 if __name__ == "__main__":
+    # 实例化并启动应用
+    mtao_web = MaoTaoWeb()
+    mtao_web.app.launch(server_name="0.0.0.0", server_port=7860, debug=True)