File size: 2,725 Bytes
6e9b5dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
File: CodonPostProcessing.py
---------------------------
Post-processing utilities for codon optimization using DNAChisel.
This module provides sequence polishing capabilities to fix restriction sites,
homopolymers, and other constraints while preserving CAI and GC content.
"""

import warnings
import numpy as np

try:
    from dnachisel import (
        DnaOptimizationProblem,
        AvoidPattern,
        EnforceGCContent,
        EnforceTranslation,
        CodonOptimize,
    )
    DNACHISEL_AVAILABLE = True
except ImportError:
    DNACHISEL_AVAILABLE = False
    # This warning will be shown when the module is first imported.
    warnings.warn(
        "DNAChisel is not installed. Post-processing features will be disabled."
    )

def polish_sequence_with_dnachisel(
    dna_sequence: str,
    protein_sequence: str,
    gc_bounds: tuple = (45.0, 55.0),
    cai_species: str = "e_coli",
    avoid_homopolymers_length: int = 6,
    enzymes_to_avoid: list = None
):
    """
    Polishes a DNA sequence using DNAChisel to meet lab synthesis constraints.
    """
    if not DNACHISEL_AVAILABLE:
        warnings.warn("DNAChisel not available, skipping post-processing.")
        return dna_sequence

    if enzymes_to_avoid is None:
        # Common cloning enzymes
        enzymes_to_avoid = ["EcoRI", "XbaI", "SpeI", "PstI", "NotI"]

    try:
        # Start with the basic, essential constraints
        constraints = [
            EnforceTranslation(translation=protein_sequence),
            EnforceGCContent(mini=gc_bounds[0] / 100.0, maxi=gc_bounds[1] / 100.0),
        ]

        # Add enzyme avoidance constraints safely
        for enzyme in enzymes_to_avoid:
            try:
                # This is the modern way to avoid enzyme sites
                constraints.append(AvoidPattern.from_enzyme_name(enzyme))
            except Exception:
                warnings.warn(f"Could not find enzyme '{enzyme}' in DNAChisel library.")

        # Add homopolymer avoidance constraints
        for base in "ATGC":
            constraints.append(AvoidPattern(base * avoid_homopolymers_length))

        # Define the optimization problem
        problem = DnaOptimizationProblem(
            sequence=dna_sequence,
            constraints=constraints,
            objectives=[CodonOptimize(species=cai_species, method="match_codon_usage")]
        )

        # Solve the problem
        problem.resolve_constraints()
        problem.optimize()

        # Return the polished sequence
        return problem.sequence

    except Exception as e:
        warnings.warn(f"DNAChisel post-processing failed with an error: {e}")
        # Return the original sequence if polishing fails
        return dna_sequence