File size: 4,996 Bytes
65bd8af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from Bio.PDB import PDBParser
from Bio.PDB.Polypeptide import is_aa
import numpy as np
def calculate_min_distance(residue_atoms, peptide_atoms):
"""
Calculate the minimum distance between any atom in the residue and any atom in the peptide.
"""
min_distance = np.inf
for atom1 in residue_atoms:
for atom2 in peptide_atoms:
distance = atom1 - atom2 # Biopython allows direct distance calculation
if distance < min_distance:
min_distance = distance
return min_distance
def calculate_centroid_distance(residue_atoms, peptide_atoms):
"""
Calculate the distance between the centroids of the residue and the peptide.
"""
residue_coords = np.array([atom.get_coord() for atom in residue_atoms])
peptide_coords = np.array([atom.get_coord() for atom in peptide_atoms])
residue_centroid = residue_coords.mean(axis=0)
peptide_centroid = peptide_coords.mean(axis=0)
centroid_distance = np.linalg.norm(residue_centroid - peptide_centroid)
return centroid_distance
def get_peptide_atoms(model, peptide_chain_id):
"""
Extract all atoms from the peptide chain.
"""
peptide_chain = model[peptide_chain_id]
peptide_atoms = list(peptide_chain.get_atoms())
return peptide_atoms
def get_residue_atoms(model, protein_chain_id, residue_indices):
"""
Extract atoms from specified residues in the protein chain.
"""
protein_chain = model[protein_chain_id]
residues = []
for res in protein_chain:
res_id = res.get_id()[1]
if res_id in residue_indices and is_aa(res, standard=True):
residues.append(res)
return residues
def compute_average_distance(pdb_file, protein_chain_id, peptide_chain_id, residue_indices, distance_type='min'):
"""
Compute the average distance of specified residues to the peptide.
Parameters:
- pdb_file: Path to the PDB file.
- protein_chain_id: Chain ID of the protein.
- peptide_chain_id: Chain ID of the peptide.
- residue_indices: List of residue numbers on the protein.
- distance_type: 'min' for minimum distance, 'centroid' for centroid distance.
Returns:
- average_distance: The average distance across specified residues.
- detailed_distances: Dictionary with residue indices as keys and distances as values.
"""
# Initialize the parser
parser = PDBParser(QUIET=True)
structure = parser.get_structure('complex', pdb_file)
# Assume single model; adjust if multiple models exist
model = structure[0]
# Extract peptide atoms
peptide_atoms = get_peptide_atoms(model, peptide_chain_id)
# Extract specified residues
residues = get_residue_atoms(model, protein_chain_id, residue_indices)
if not residues:
raise ValueError("No valid residues found with the provided indices.")
detailed_distances = {}
for res in residues:
res_id = res.get_id()[1]
res_atoms = list(res.get_atoms())
if distance_type == 'min':
distance = calculate_min_distance(res_atoms, peptide_atoms)
elif distance_type == 'centroid':
distance = calculate_centroid_distance(res_atoms, peptide_atoms)
else:
raise ValueError("Invalid distance_type. Choose 'min' or 'centroid'.")
detailed_distances[res_id] = distance
average_distance = np.mean(list(detailed_distances.values()))
return average_distance, detailed_distances
def parse_motif(motif: str) -> list:
parts = motif.split(',')
result = []
for part in parts:
part = part.strip()
if '-' in part:
start, end = map(int, part.split('-'))
result.extend(range(start, end + 1))
else:
result.append(int(part))
result = [i+1 for i in result]
return result
if __name__ == "__main__":
# Parameters (Modify these as needed)
pdb_file = "/home/tc415/discrete-diffusion-guidance/pdbs/UBC9_docked.pdb" # Path to your PDB file
protein_chain_id = "A" # Protein chain ID
peptide_chain_id = "B" # Peptide chain ID
residue_indices = parse_motif('123-127') # Replace with your residue numbers
distance_type = 'min' # 'min' or 'centroid'
try:
avg_dist, dist_details = compute_average_distance(
pdb_file,
protein_chain_id,
peptide_chain_id,
residue_indices,
distance_type=distance_type
)
print(f"Distance Type: {distance_type.capitalize()} Distance")
print("Individual Residue Distances:")
for res_id in sorted(dist_details):
print(f"Residue {res_id}: {dist_details[res_id]:.2f} Å")
print(f"\nAverage Distance for specified residues: {avg_dist:.2f} Å")
except Exception as e:
print(f"Error: {e}")
|