NielTT commited on
Commit
e611d1f
·
verified ·
1 Parent(s): 9afd7b7

Upload 108 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +66 -6
  2. app.py +1188 -0
  3. config.yaml +16 -0
  4. data/epitopes/data_splits.json +2878 -0
  5. data/epitopes/epitopes.csv +0 -0
  6. data/epitopes/epitopes_13.csv +0 -0
  7. data/species.json +3562 -0
  8. models/.DS_Store +0 -0
  9. models/ReCEP/20250626_110438/best_mcc_model.bin +3 -0
  10. models/ReCEP/20250626_110438/config.json +70 -0
  11. requirements.txt +49 -0
  12. src/.DS_Store +0 -0
  13. src/bce.egg-info/PKG-INFO +3 -0
  14. src/bce.egg-info/SOURCES.txt +43 -0
  15. src/bce.egg-info/dependency_links.txt +1 -0
  16. src/bce.egg-info/top_level.txt +1 -0
  17. src/bce/.DS_Store +0 -0
  18. src/bce/__init__.py +0 -0
  19. src/bce/__pycache__/__init__.cpython-310.pyc +0 -0
  20. src/bce/__pycache__/__init__.cpython-39.pyc +0 -0
  21. src/bce/__pycache__/loss.cpython-310.pyc +0 -0
  22. src/bce/__pycache__/loss.cpython-39.pyc +0 -0
  23. src/bce/antigen/__init__.py +0 -0
  24. src/bce/antigen/__pycache__/__init__.cpython-310.pyc +0 -0
  25. src/bce/antigen/__pycache__/__init__.cpython-39.pyc +0 -0
  26. src/bce/antigen/__pycache__/antigen.cpython-310.pyc +0 -0
  27. src/bce/antigen/__pycache__/antigen.cpython-39.pyc +0 -0
  28. src/bce/antigen/__pycache__/pc.cpython-310.pyc +0 -0
  29. src/bce/antigen/__pycache__/protein_chain.cpython-310.pyc +0 -0
  30. src/bce/antigen/antigen.py +0 -0
  31. src/bce/antigen/pc.py +37 -0
  32. src/bce/data/__init__.py +0 -0
  33. src/bce/data/__pycache__/__init__.cpython-310.pyc +0 -0
  34. src/bce/data/__pycache__/__init__.cpython-39.pyc +0 -0
  35. src/bce/data/__pycache__/data.cpython-310.pyc +0 -0
  36. src/bce/data/__pycache__/data.cpython-39.pyc +0 -0
  37. src/bce/data/__pycache__/dataset.cpython-310.pyc +0 -0
  38. src/bce/data/__pycache__/dataset_egnn.cpython-310.pyc +0 -0
  39. src/bce/data/__pycache__/dataset_final.cpython-310.pyc +0 -0
  40. src/bce/data/__pycache__/graph_utils.cpython-310.pyc +0 -0
  41. src/bce/data/__pycache__/utils.cpython-310.pyc +0 -0
  42. src/bce/data/__pycache__/utils.cpython-39.pyc +0 -0
  43. src/bce/data/data.py +1262 -0
  44. src/bce/data/utils.py +352 -0
  45. src/bce/model/EGNN.py +315 -0
  46. src/bce/model/ReCEP.py +481 -0
  47. src/bce/model/__init__.py +11 -0
  48. src/bce/model/__pycache__/EGNN.cpython-310.pyc +0 -0
  49. src/bce/model/__pycache__/EGNN.cpython-39.pyc +0 -0
  50. src/bce/model/__pycache__/ReCEP.cpython-310.pyc +0 -0
README.md CHANGED
@@ -1,13 +1,73 @@
1
  ---
2
- title: ReCEP
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.36.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: B-cell Epitope Prediction Server
3
+ emoji: 🧬
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
+ # 🧬 B-cell Epitope Prediction Server
14
+
15
+ A web-based interface for B-cell epitope prediction using the ReCEP model.
16
+
17
+ ## 📋 How to Use
18
+
19
+ ### 1. Input Protein Structure
20
+
21
+ Choose one of two input methods:
22
+
23
+ #### Option A: PDB ID
24
+ - Enter a 4-character PDB ID (e.g., "5I9Q")
25
+ - Specify the chain ID (e.g., "A")
26
+
27
+ #### Option B: Upload PDB File
28
+ - Upload a PDB structure file (.pdb or .ent format)
29
+ - Optionally specify a custom PDB ID
30
+ - Specify the chain ID
31
+
32
+ ### 2. Configure Prediction Parameters
33
+
34
+ #### Basic Parameters:
35
+ - **Chain ID**: Target protein chain (default: A)
36
+
37
+ #### Advanced Parameters (Optional):
38
+ - **Radius**: Spherical region radius in Ångstroms (default: 19.0)
39
+ - **Top-k Regions**: Number of top regions to analyze (default: 7)
40
+ - **Encoder**: Protein encoder type (ESM-C only now)
41
+ - **Device Configuration**: CPU or GPU processing (CPU Only now)
42
+ - **Threshold**: Custom prediction threshold (leave empty for auto, required)
43
+
44
+ ### 3. View Results
45
+
46
+ The application provides:
47
+
48
+ #### Prediction Summary
49
+ - Protein information (PDB ID, chain, length, sequence)
50
+ - Prediction statistics (epitope count, coverage rate, etc.)
51
+ - Top-k region centers
52
+ - Predicted epitope residues
53
+ - Binding region residues
54
+
55
+ #### Download Options
56
+ - **JSON Results**: Complete prediction data with metadata
57
+ - **CSV Results**: Residue-level predictions for analysis
58
+ - **3D Visualization**: Interactive HTML file with 3Dmol.js viewer
59
+
60
+ ### 4. 3D Visualization
61
+
62
+ The downloadable HTML file includes:
63
+ - **Display Modes**:
64
+ - Predicted Epitopes: Highlight predicted epitope residues
65
+ - Probability Gradient: Color residues by prediction confidence
66
+ - **Representation Styles**: Cartoon, Surface, Stick, Sphere
67
+ - **Interactive Controls**: Rotate, zoom, pan, reset view, save image
68
+
69
+ ## 📜 License
70
+
71
+ This project is licensed under the MIT License - see the LICENSE file for details.
72
+
73
+ **Note**: This is a research tool for B-cell epitope prediction. Results should be validated through experimental methods for clinical or commercial applications.
app.py ADDED
@@ -0,0 +1,1188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import json
4
+ import tempfile
5
+ import traceback
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pathlib import Path
9
+ from typing import Optional, Tuple, Dict, Any
10
+ import torch
11
+ import time
12
+ import io
13
+ import base64
14
+ import zipfile
15
+ from datetime import datetime
16
+
17
+ # Dynamic installation of PyTorch Geometric dependencies
18
+ def install_torch_geometric_deps():
19
+ """Install PyTorch Geometric dependencies at runtime to avoid compilation issues during Hugging Face Spaces build"""
20
+ import subprocess
21
+ import sys
22
+
23
+ # Check if torch-scatter is already installed
24
+ try:
25
+ import torch_scatter
26
+ print("✅ torch-scatter already installed")
27
+ return True
28
+ except ImportError:
29
+ print("🔄 Installing torch-scatter and related packages...")
30
+
31
+ # Get PyTorch version and CUDA info
32
+ torch_version = torch.__version__
33
+ torch_version_str = '+'.join(torch_version.split('+')[:1]) # Remove CUDA info
34
+
35
+ # Use PyTorch Geometric official recommended installation method
36
+ try:
37
+ # For CPU version, use official CPU wheel
38
+ pip_cmd = [
39
+ sys.executable, "-m", "pip", "install",
40
+ "torch-scatter", "torch-sparse", "torch-cluster", "torch-spline-conv",
41
+ "-f", f"https://data.pyg.org/whl/torch-{torch_version_str}+cpu.html",
42
+ "--no-cache-dir"
43
+ ]
44
+
45
+ print(f"Running: {' '.join(pip_cmd)}")
46
+ result = subprocess.run(pip_cmd, capture_output=True, text=True, timeout=300)
47
+
48
+ if result.returncode == 0:
49
+ print("✅ Successfully installed torch-scatter and related packages")
50
+ return True
51
+ else:
52
+ print(f"❌ Failed to install packages: {result.stderr}")
53
+ # Try simplified installation method
54
+ try:
55
+ simple_cmd = [sys.executable, "-m", "pip", "install", "torch-scatter", "--no-cache-dir"]
56
+ result = subprocess.run(simple_cmd, capture_output=True, text=True, timeout=300)
57
+ if result.returncode == 0:
58
+ print("✅ Successfully installed torch-scatter with simple method")
59
+ return True
60
+ else:
61
+ print(f"❌ Simple install also failed: {result.stderr}")
62
+ return False
63
+ except Exception as e:
64
+ print(f"❌ Exception during simple install: {e}")
65
+ return False
66
+
67
+ except subprocess.TimeoutExpired:
68
+ print("❌ Installation timeout - packages may not be available")
69
+ return False
70
+ except Exception as e:
71
+ print(f"❌ Exception during installation: {e}")
72
+ return False
73
+
74
+ # Try to install PyTorch Geometric dependencies
75
+ deps_installed = install_torch_geometric_deps()
76
+
77
+ if not deps_installed:
78
+ print("⚠️ Warning: PyTorch Geometric dependencies not installed. Some features may not work.")
79
+ print("The application will try to continue with limited functionality.")
80
+
81
+ # Set up paths and imports for different deployment environments
82
+ import sys
83
+ BASE_DIR = Path(__file__).parent
84
+
85
+ # Smart import handling for different environments
86
+ def setup_imports():
87
+ """Smart import setup for different deployment environments"""
88
+ global AntigenChain, PROJECT_BASE_DIR
89
+
90
+ # Method 1: Try importing from src directory (local development)
91
+ if (BASE_DIR / "src").exists():
92
+ sys.path.insert(0, str(BASE_DIR))
93
+ try:
94
+ from src.bce.antigen.antigen import AntigenChain
95
+ from src.bce.utils.constants import BASE_DIR as PROJECT_BASE_DIR
96
+ print("✅ Successfully imported from src/ directory")
97
+ return True
98
+ except ImportError as e:
99
+ print(f"❌ Failed to import from src/: {e}")
100
+
101
+ # Method 2: Try adding src to path and direct import (Hugging Face Spaces)
102
+ src_path = BASE_DIR / "src"
103
+ if src_path.exists():
104
+ sys.path.insert(0, str(src_path))
105
+ try:
106
+ from bce.antigen.antigen import AntigenChain
107
+ from bce.utils.constants import BASE_DIR as PROJECT_BASE_DIR
108
+ print("✅ Successfully imported from src/ added to path")
109
+ return True
110
+ except ImportError as e:
111
+ print(f"❌ Failed to import with src/ in path: {e}")
112
+
113
+ # Method 3: Try direct import (if package is installed)
114
+ try:
115
+ from bce.antigen.antigen import AntigenChain
116
+ from bce.utils.constants import BASE_DIR as PROJECT_BASE_DIR
117
+ print("✅ Successfully imported from installed package")
118
+ return True
119
+ except ImportError as e:
120
+ print(f"❌ Failed to import from installed package: {e}")
121
+
122
+ # If all methods fail, use default settings
123
+ print("⚠️ All import methods failed, using fallback settings")
124
+ PROJECT_BASE_DIR = BASE_DIR
125
+ return False
126
+
127
+ # Execute import setup
128
+ import_success = setup_imports()
129
+
130
+ if not import_success:
131
+ print("❌ Critical: Could not import BCE modules. Please check the file structure.")
132
+ print("Expected structure:")
133
+ print("- src/bce/antigen/antigen.py")
134
+ print("- src/bce/utils/constants.py")
135
+ print("- src/bce/model/ReCEP.py")
136
+ print("- src/bce/data/utils.py")
137
+ sys.exit(1)
138
+
139
+ # Configuration
140
+ DEFAULT_MODEL_PATH = os.getenv("BCE_MODEL_PATH", str(PROJECT_BASE_DIR / "models" / "ReCEP" / "20250626_110438" / "best_mcc_model.bin"))
141
+ ESM_TOKEN = os.getenv("ESM_TOKEN", "1mzAo8l1uxaU8UfVcGgV7B")
142
+
143
+ # PDB data directory
144
+ PDB_DATA_DIR = PROJECT_BASE_DIR / "data" / "pdb"
145
+ PDB_DATA_DIR.mkdir(parents=True, exist_ok=True)
146
+
147
+ def validate_pdb_id(pdb_id: str) -> bool:
148
+ """Validate PDB ID format"""
149
+ if not pdb_id or len(pdb_id) != 4:
150
+ return False
151
+ return pdb_id.isalnum()
152
+
153
+ def validate_chain_id(chain_id: str) -> bool:
154
+ """Validate chain ID format"""
155
+ if not chain_id or len(chain_id) != 1:
156
+ return False
157
+ return chain_id.isalnum()
158
+
159
+ def create_pdb_visualization_html(pdb_data: str, predicted_epitopes: list,
160
+ predictions: dict, protein_id: str, top_k_regions: list = None) -> str:
161
+ """Create HTML with 3Dmol.js visualization compatible with Gradio - enhanced version with more features"""
162
+
163
+ # Prepare data for JavaScript
164
+ epitope_residues = predicted_epitopes
165
+
166
+ # Process top_k_regions for visualization
167
+ processed_regions = []
168
+ if top_k_regions:
169
+ for i, region in enumerate(top_k_regions):
170
+ if isinstance(region, dict):
171
+ processed_regions.append({
172
+ 'center_idx': region.get('center_idx', 0),
173
+ 'center_residue': region.get('center_residue', region.get('center_idx', 0)),
174
+ 'covered_residues': region.get('covered_residues', region.get('covered_indices', [])),
175
+ 'radius': 19.0, # Default radius
176
+ 'predicted_value': region.get('graph_pred', 0.0)
177
+ })
178
+
179
+ # Create a unique ID for this visualization to avoid conflicts
180
+ import uuid
181
+ viewer_id = f"viewer_{uuid.uuid4().hex[:8]}"
182
+
183
+ html_content = f"""
184
+ <div style="width: 100%; height: 600px; border: 1px solid #ddd; border-radius: 8px; overflow: hidden;">
185
+ <div style="padding: 10px; background: #f8f9fa; border-bottom: 1px solid #ddd;">
186
+ <h3 style="margin: 0 0 10px 0; color: #333;">3D Structure Visualization - {protein_id}</h3>
187
+ <div style="display: flex; gap: 15px; align-items: center; flex-wrap: wrap;">
188
+ <div>
189
+ <label style="font-weight: bold; margin-right: 5px;">Display Mode:</label>
190
+ <select id="vizMode_{viewer_id}" onchange="updateVisualization_{viewer_id}()" style="padding: 4px;">
191
+ <option value="prediction">Predicted Epitopes</option>
192
+ <option value="probability">Probability Gradient</option>
193
+ <option value="regions">Top-k Regions</option>
194
+ </select>
195
+ </div>
196
+ <div>
197
+ <label style="font-weight: bold; margin-right: 5px;">Style:</label>
198
+ <select id="vizStyle_{viewer_id}" onchange="updateVisualization_{viewer_id}()" style="padding: 4px;">
199
+ <option value="cartoon">Cartoon</option>
200
+ <option value="surface">Surface</option>
201
+ <option value="stick">Stick</option>
202
+ <option value="sphere">Sphere</option>
203
+ </select>
204
+ </div>
205
+ <div>
206
+ <label style="font-weight: bold; margin-right: 5px;">
207
+ <input type="checkbox" id="showSpheres_{viewer_id}" onchange="updateVisualization_{viewer_id}()" style="margin-right: 3px;"> Show Spheres
208
+ </label>
209
+ </div>
210
+ <div>
211
+ <label style="font-weight: bold; margin-right: 5px;">Sphere Display:</label>
212
+ <select id="sphereCount_{viewer_id}" onchange="handleSphereCountChange_{viewer_id}()" style="padding: 4px;">
213
+ <option value="1">Top 1</option>
214
+ <option value="2">Top 2</option>
215
+ <option value="3">Top 3</option>
216
+ <option value="4">Top 4</option>
217
+ <option value="5" selected>Top 5</option>
218
+ <option value="6">Top 6</option>
219
+ <option value="7">Top 7</option>
220
+ <option value="all">All Spheres</option>
221
+ <option value="custom">Custom Selection</option>
222
+ </select>
223
+ </div>
224
+ <div id="customSphereSelection_{viewer_id}" style="display: none; margin-top: 10px; padding: 10px; background: #f9f9f9; border-radius: 5px; max-height: 120px; overflow-y: auto;">
225
+ <label style="font-weight: bold; margin-bottom: 5px; display: block;">Select Spheres to Display:</label>
226
+ <div id="sphereCheckboxes_{viewer_id}" style="display: flex; flex-wrap: wrap; gap: 8px; max-height: 80px; overflow-y: auto;">
227
+ <!-- Checkboxes will be dynamically generated -->
228
+ </div>
229
+ </div>
230
+ <div>
231
+ <button onclick="resetView_{viewer_id}()" style="padding: 4px 8px; margin-right: 5px;">Reset View</button>
232
+ <button onclick="saveImage_{viewer_id}()" style="padding: 4px 8px;">Save Image</button>
233
+ </div>
234
+ </div>
235
+ </div>
236
+ <div id="{viewer_id}" style="width: 100%; height: 520px; min-height: 400px; position: relative; background: #f0f0f0;">
237
+ <div style="position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); text-align: center;">
238
+ <p id="status_{viewer_id}" style="color: #666;">Loading 3Dmol.js...</p>
239
+ </div>
240
+ </div>
241
+ </div>
242
+
243
+ <script src="https://unpkg.com/[email protected]/build/3Dmol-min.js"></script>
244
+ <script>
245
+ // Global variables for this viewer instance
246
+ window.viewer_{viewer_id} = null;
247
+ window.pdbData_{viewer_id} = `{pdb_data}`;
248
+ window.predictedEpitopes_{viewer_id} = {json.dumps(epitope_residues)};
249
+ window.predictions_{viewer_id} = {json.dumps(predictions)};
250
+ window.topKRegions_{viewer_id} = {json.dumps(processed_regions)};
251
+
252
+ // Wait for 3Dmol to be available with timeout
253
+ function wait3Dmol_{viewer_id}(attempts = 0) {{
254
+ if (typeof $3Dmol !== 'undefined') {{
255
+ console.log('3Dmol.js loaded successfully for {viewer_id}');
256
+ document.getElementById('status_{viewer_id}').textContent = 'Initializing 3D viewer...';
257
+ setTimeout(() => initializeViewer_{viewer_id}(), 100);
258
+ }} else if (attempts < 50) {{ // 5 second timeout
259
+ console.log(`Waiting for 3Dmol.js... attempt ${{attempts + 1}}`);
260
+ setTimeout(() => wait3Dmol_{viewer_id}(attempts + 1), 100);
261
+ }} else {{
262
+ console.error('Failed to load 3Dmol.js after 5 seconds');
263
+ document.getElementById('status_{viewer_id}').textContent = 'Failed to load 3Dmol.js. Please refresh the page.';
264
+ document.getElementById('status_{viewer_id}').style.color = 'red';
265
+ }}
266
+ }}
267
+
268
+ function initializeViewer_{viewer_id}() {{
269
+ try {{
270
+ const element = document.getElementById('{viewer_id}');
271
+ if (!element) {{
272
+ console.error('Viewer element not found: {viewer_id}');
273
+ return;
274
+ }}
275
+
276
+ document.getElementById('status_{viewer_id}').textContent = 'Creating viewer...';
277
+
278
+ window.viewer_{viewer_id} = $3Dmol.createViewer(element, {{
279
+ defaultcolors: $3Dmol.rasmolElementColors
280
+ }});
281
+
282
+ document.getElementById('status_{viewer_id}').textContent = 'Loading structure...';
283
+
284
+ window.viewer_{viewer_id}.addModel(window.pdbData_{viewer_id}, 'pdb');
285
+
286
+ // Hide status message
287
+ const statusEl = document.getElementById('status_{viewer_id}');
288
+ if (statusEl) statusEl.style.display = 'none';
289
+
290
+ updateVisualization_{viewer_id}();
291
+
292
+ // Initialize sphere checkboxes if data is available
293
+ if (window.topKRegions_{viewer_id} && window.topKRegions_{viewer_id}.length > 0) {{
294
+ generateSphereCheckboxes_{viewer_id}();
295
+ }}
296
+
297
+ console.log('3D viewer initialized successfully for {viewer_id}');
298
+ }} catch (error) {{
299
+ console.error('Error initializing 3D viewer:', error);
300
+ const statusEl = document.getElementById('status_{viewer_id}');
301
+ if (statusEl) {{
302
+ statusEl.textContent = 'Error loading 3D viewer: ' + error.message;
303
+ statusEl.style.color = 'red';
304
+ }}
305
+ }}
306
+ }}
307
+
308
+ function updateVisualization_{viewer_id}() {{
309
+ if (!window.viewer_{viewer_id}) return;
310
+
311
+ try {{
312
+ const mode = document.getElementById('vizMode_{viewer_id}').value;
313
+ const style = document.getElementById('vizStyle_{viewer_id}').value;
314
+ const showSpheres = document.getElementById('showSpheres_{viewer_id}').checked;
315
+
316
+ // Clear everything
317
+ window.viewer_{viewer_id}.removeAllShapes();
318
+ window.viewer_{viewer_id}.removeAllSurfaces();
319
+ window.viewer_{viewer_id}.setStyle({{}}, {{}});
320
+
321
+ // Base style
322
+ const baseStyle = {{}};
323
+ if (style === 'surface') {{
324
+ baseStyle['cartoon'] = {{ hidden: true }};
325
+ }} else {{
326
+ baseStyle[style] = {{ color: '#e6e6f7' }};
327
+ }}
328
+ window.viewer_{viewer_id}.setStyle({{}}, baseStyle);
329
+
330
+ if (mode === 'prediction') {{
331
+ // Highlight predicted epitopes
332
+ if (window.predictedEpitopes_{viewer_id}.length > 0 && style !== 'surface') {{
333
+ const epitopeStyle = {{}};
334
+ epitopeStyle[style] = {{ color: '#9C6ADE' }};
335
+ window.viewer_{viewer_id}.setStyle({{ resi: window.predictedEpitopes_{viewer_id} }}, epitopeStyle);
336
+ }}
337
+
338
+ // Add surface for epitopes if surface mode
339
+ if (style === 'surface') {{
340
+ window.viewer_{viewer_id}.addSurface($3Dmol.SurfaceType.VDW, {{
341
+ opacity: 0.8,
342
+ color: '#e6e6f7'
343
+ }});
344
+
345
+ if (window.predictedEpitopes_{viewer_id}.length > 0) {{
346
+ window.viewer_{viewer_id}.addSurface($3Dmol.SurfaceType.VDW, {{
347
+ opacity: 1.0,
348
+ color: '#9C6ADE'
349
+ }}, {{ resi: window.predictedEpitopes_{viewer_id} }});
350
+ }}
351
+ }}
352
+ }} else if (mode === 'probability') {{
353
+ // Color by probability scores
354
+ if (window.predictions_{viewer_id} && Object.keys(window.predictions_{viewer_id}).length > 0) {{
355
+ const allProbs = Object.values(window.predictions_{viewer_id}).filter(p => p !== undefined);
356
+ const minProb = Math.min(...allProbs);
357
+ const maxProb = Math.max(...allProbs);
358
+
359
+ Object.entries(window.predictions_{viewer_id}).forEach(([resnum, score]) => {{
360
+ const normalizedProb = maxProb > minProb ? (score - minProb) / (maxProb - minProb) : 0.5;
361
+ const color = interpolateColor('#E6F3FF', '#DC143C', normalizedProb);
362
+ const probStyle = {{}};
363
+ if (style !== 'surface') {{
364
+ probStyle[style] = {{ color: color }};
365
+ window.viewer_{viewer_id}.setStyle({{ resi: parseInt(resnum) }}, probStyle);
366
+ }}
367
+ }});
368
+
369
+ if (style === 'surface') {{
370
+ window.viewer_{viewer_id}.addSurface($3Dmol.SurfaceType.VDW, {{
371
+ opacity: 0.8,
372
+ color: '#e6e6f7'
373
+ }});
374
+
375
+ Object.entries(window.predictions_{viewer_id}).forEach(([resnum, score]) => {{
376
+ const normalizedProb = maxProb > minProb ? (score - minProb) / (maxProb - minProb) : 0.5;
377
+ const color = interpolateColor('#E6F3FF', '#DC143C', normalizedProb);
378
+ window.viewer_{viewer_id}.addSurface($3Dmol.SurfaceType.VDW, {{
379
+ opacity: 1.0,
380
+ color: color
381
+ }}, {{ resi: parseInt(resnum) }});
382
+ }});
383
+ }}
384
+ }}
385
+ }} else if (mode === 'regions') {{
386
+ // Color top-k regions
387
+ const colors = ['#FF6B6B', '#96CEB4', '#4ECDC4', '#45B7D1', '#FFEAA7', '#DDA0DD', '#87CEEB'];
388
+
389
+ if (window.topKRegions_{viewer_id} && window.topKRegions_{viewer_id}.length > 0) {{
390
+ window.topKRegions_{viewer_id}.forEach((region, index) => {{
391
+ const color = colors[index % colors.length];
392
+ const regionStyle = {{}};
393
+ if (style !== 'surface') {{
394
+ regionStyle[style] = {{ color: color }};
395
+ window.viewer_{viewer_id}.setStyle({{ resi: region.covered_residues }}, regionStyle);
396
+ }}
397
+ }});
398
+
399
+ if (style === 'surface') {{
400
+ window.viewer_{viewer_id}.addSurface($3Dmol.SurfaceType.VDW, {{
401
+ opacity: 0.8,
402
+ color: '#e6e6f7'
403
+ }});
404
+
405
+ window.topKRegions_{viewer_id}.forEach((region, index) => {{
406
+ const color = colors[index % colors.length];
407
+ window.viewer_{viewer_id}.addSurface($3Dmol.SurfaceType.VDW, {{
408
+ opacity: 1.0,
409
+ color: color
410
+ }}, {{ resi: region.covered_residues }});
411
+ }});
412
+ }}
413
+ }}
414
+ }}
415
+
416
+ // Add spheres if requested
417
+ if (showSpheres && window.topKRegions_{viewer_id} && window.topKRegions_{viewer_id}.length > 0) {{
418
+ const colors = ['#FF6B6B', '#96CEB4', '#4ECDC4', '#45B7D1', '#FFEAA7', '#DDA0DD', '#87CEEB'];
419
+ const sphereCount = document.getElementById('sphereCount_{viewer_id}').value;
420
+
421
+ // Determine which spheres to show
422
+ let spheresToShow = [];
423
+ if (sphereCount === 'custom') {{
424
+ const selectedIndices = getSelectedSphereIndices_{viewer_id}();
425
+ spheresToShow = selectedIndices.map(idx => ({{ region: window.topKRegions_{viewer_id}[idx], index: idx }}));
426
+ }} else {{
427
+ let numSpheres = sphereCount === 'all' ? window.topKRegions_{viewer_id}.length : parseInt(sphereCount);
428
+ numSpheres = Math.min(numSpheres, window.topKRegions_{viewer_id}.length);
429
+ spheresToShow = window.topKRegions_{viewer_id}.slice(0, numSpheres).map((region, index) => ({{ region, index }}));
430
+ }}
431
+
432
+ spheresToShow.forEach(({{ region, index }}) => {{
433
+ const color = colors[index % colors.length];
434
+ const centerResidues = window.viewer_{viewer_id}.getModel(0).selectedAtoms({{
435
+ resi: region.center_residue,
436
+ atom: 'CA'
437
+ }});
438
+
439
+ if (centerResidues.length > 0) {{
440
+ const centerAtom = centerResidues[0];
441
+ const centerCoords = {{ x: centerAtom.x, y: centerAtom.y, z: centerAtom.z }};
442
+
443
+ // Add wireframe sphere
444
+ window.viewer_{viewer_id}.addSphere({{
445
+ center: centerCoords,
446
+ radius: region.radius,
447
+ color: color,
448
+ wireframe: true,
449
+ linewidth: 2.0
450
+ }});
451
+
452
+ // Add center point
453
+ window.viewer_{viewer_id}.addSphere({{
454
+ center: centerCoords,
455
+ radius: 0.7,
456
+ color: '#FFD700',
457
+ wireframe: false
458
+ }});
459
+ }}
460
+ }});
461
+ }}
462
+
463
+ window.viewer_{viewer_id}.zoomTo();
464
+ window.viewer_{viewer_id}.render();
465
+ }} catch (error) {{
466
+ console.error('Error updating visualization:', error);
467
+ }}
468
+ }}
469
+
470
+ // Color interpolation helper functions
471
+ function interpolateColor(color1, color2, factor) {{
472
+ const c1 = hexToRgb(color1);
473
+ const c2 = hexToRgb(color2);
474
+
475
+ const r = Math.round(c1.r + factor * (c2.r - c1.r));
476
+ const g = Math.round(c1.g + factor * (c2.g - c1.g));
477
+ const b = Math.round(c1.b + factor * (c2.b - c1.b));
478
+
479
+ return rgbToHex(r, g, b);
480
+ }}
481
+
482
+ function hexToRgb(hex) {{
483
+ const result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(hex);
484
+ return result ? {{
485
+ r: parseInt(result[1], 16),
486
+ g: parseInt(result[2], 16),
487
+ b: parseInt(result[3], 16)
488
+ }} : null;
489
+ }}
490
+
491
+ function rgbToHex(r, g, b) {{
492
+ return "#" + ((1 << 24) + (r << 16) + (g << 8) + b).toString(16).slice(1);
493
+ }}
494
+
495
+ function resetView_{viewer_id}() {{
496
+ if (window.viewer_{viewer_id}) {{
497
+ window.viewer_{viewer_id}.zoomTo();
498
+ window.viewer_{viewer_id}.render();
499
+ }}
500
+ }}
501
+
502
+ function saveImage_{viewer_id}() {{
503
+ if (window.viewer_{viewer_id}) {{
504
+ window.viewer_{viewer_id}.pngURI(function(uri) {{
505
+ const link = document.createElement('a');
506
+ link.href = uri;
507
+ link.download = '{protein_id}_structure.png';
508
+ link.click();
509
+ }});
510
+ }}
511
+ }}
512
+
513
+ // Handle sphere count selection change
514
+ function handleSphereCountChange_{viewer_id}() {{
515
+ const sphereCount = document.getElementById('sphereCount_{viewer_id}').value;
516
+ const customSelectionDiv = document.getElementById('customSphereSelection_{viewer_id}');
517
+
518
+ if (sphereCount === 'custom') {{
519
+ customSelectionDiv.style.display = 'block';
520
+ generateSphereCheckboxes_{viewer_id}();
521
+ }} else {{
522
+ customSelectionDiv.style.display = 'none';
523
+ }}
524
+
525
+ updateVisualization_{viewer_id}();
526
+ }}
527
+
528
+ // Generate sphere checkboxes for custom selection
529
+ function generateSphereCheckboxes_{viewer_id}() {{
530
+ if (!window.topKRegions_{viewer_id} || window.topKRegions_{viewer_id}.length === 0) {{
531
+ return;
532
+ }}
533
+
534
+ const regions = window.topKRegions_{viewer_id};
535
+ const container = document.getElementById('sphereCheckboxes_{viewer_id}');
536
+ container.innerHTML = '';
537
+
538
+ regions.forEach((region, index) => {{
539
+ const sphereNum = index + 1;
540
+ const checkboxId = `sphere_{{sphereNum}}_{viewer_id}`;
541
+ const colors = ['#FF6B6B', '#96CEB4', '#4ECDC4', '#45B7D1', '#FFEAA7', '#DDA0DD', '#87CEEB'];
542
+ const sphereColor = colors[index % colors.length];
543
+
544
+ const checkboxContainer = document.createElement('div');
545
+ checkboxContainer.style.cssText = `
546
+ display: flex;
547
+ align-items: center;
548
+ padding: 5px 10px;
549
+ border: 1px solid #ddd;
550
+ border-radius: 4px;
551
+ background: white;
552
+ cursor: pointer;
553
+ user-select: none;
554
+ `;
555
+ checkboxContainer.setAttribute('data-sphere', sphereNum);
556
+
557
+ const checkbox = document.createElement('input');
558
+ checkbox.type = 'checkbox';
559
+ checkbox.id = checkboxId;
560
+ checkbox.checked = sphereNum <= 5; // Default: show first 5
561
+ checkbox.style.marginRight = '5px';
562
+
563
+ const colorBox = document.createElement('div');
564
+ colorBox.style.cssText = `
565
+ width: 16px;
566
+ height: 16px;
567
+ background-color: ${{sphereColor}};
568
+ border: 1px solid #333;
569
+ border-radius: 2px;
570
+ margin-right: 5px;
571
+ `;
572
+
573
+ const label = document.createElement('label');
574
+ label.setAttribute('for', checkboxId);
575
+ label.textContent = `Sphere ${{sphereNum}} (R${{region.center_residue}})`;
576
+ label.style.cursor = 'pointer';
577
+ label.style.fontSize = '14px';
578
+
579
+ checkboxContainer.appendChild(checkbox);
580
+ checkboxContainer.appendChild(colorBox);
581
+ checkboxContainer.appendChild(label);
582
+ container.appendChild(checkboxContainer);
583
+
584
+ // Add click handler
585
+ checkboxContainer.addEventListener('click', function(e) {{
586
+ if (e.target.type !== 'checkbox') {{
587
+ checkbox.checked = !checkbox.checked;
588
+ }}
589
+
590
+ if (checkbox.checked) {{
591
+ checkboxContainer.style.backgroundColor = '#f0f8ff';
592
+ checkboxContainer.style.borderColor = '#4a90e2';
593
+ }} else {{
594
+ checkboxContainer.style.backgroundColor = 'white';
595
+ checkboxContainer.style.borderColor = '#ddd';
596
+ }}
597
+
598
+ updateVisualization_{viewer_id}();
599
+ }});
600
+
601
+ // Initialize visual state
602
+ if (checkbox.checked) {{
603
+ checkboxContainer.style.backgroundColor = '#f0f8ff';
604
+ checkboxContainer.style.borderColor = '#4a90e2';
605
+ }}
606
+ }});
607
+ }}
608
+
609
+ // Get selected sphere indices for custom mode
610
+ function getSelectedSphereIndices_{viewer_id}() {{
611
+ const selected = [];
612
+ const checkboxes = document.querySelectorAll('#sphereCheckboxes_{viewer_id} input[type="checkbox"]:checked');
613
+ checkboxes.forEach(function(checkbox) {{
614
+ // Get sphere number from the data-sphere attribute of the container
615
+ const container = checkbox.closest('[data-sphere]');
616
+ if (container) {{
617
+ const sphereNum = parseInt(container.getAttribute('data-sphere'));
618
+ selected.push(sphereNum - 1); // Convert to 0-based index
619
+ }}
620
+ }});
621
+ return selected;
622
+ }}
623
+
624
+ // Start initialization
625
+ wait3Dmol_{viewer_id}();
626
+ </script>
627
+ """
628
+
629
+ return html_content
630
+
631
+ def predict_epitopes(pdb_id: str, pdb_file, chain_id: str, radius: float, k: int,
632
+ encoder: str, device_config: str, use_threshold: bool, threshold: float,
633
+ auto_cleanup: bool, progress: gr.Progress = None) -> Tuple[str, str, str, str, str, str]:
634
+ """
635
+ Main prediction function that handles the epitope prediction workflow
636
+ """
637
+ try:
638
+ # Input validation
639
+ if not pdb_file and not pdb_id:
640
+ return "Error: Please provide either a PDB ID or upload a PDB file", "", "", "", "", ""
641
+
642
+ if pdb_id and not validate_pdb_id(pdb_id):
643
+ return "Error: PDB ID must be exactly 4 characters (letters and numbers)", "", "", "", "", ""
644
+
645
+ if not validate_chain_id(chain_id):
646
+ return "Error: Chain ID must be exactly 1 character", "", "", "", "", ""
647
+
648
+ # Update progress
649
+ if progress:
650
+ progress(0.1, desc="Initializing prediction...")
651
+
652
+ # Process device configuration
653
+ device_id = -1 if device_config == "CPU Only" else int(device_config.split(" ")[1])
654
+ use_gpu = device_id >= 0
655
+
656
+ # Load protein structure
657
+ if progress:
658
+ progress(0.2, desc="Loading protein structure...")
659
+
660
+ antigen_chain = None
661
+ temp_file_path = None
662
+
663
+ try:
664
+ if pdb_file:
665
+ # Handle uploaded file
666
+ if progress:
667
+ progress(0.25, desc="Processing uploaded PDB file...")
668
+
669
+ # Debug: print type and attributes of pdb_file
670
+ print(f"🔍 Debug: pdb_file type = {type(pdb_file)}")
671
+ print(f"🔍 Debug: pdb_file attributes = {dir(pdb_file)}")
672
+
673
+ # Extract PDB ID from filename if not provided
674
+ if not pdb_id:
675
+ if hasattr(pdb_file, 'name'):
676
+ pdb_id = Path(pdb_file.name).stem.split('_')[0][:4]
677
+ else:
678
+ pdb_id = "UNKN" # Default fallback
679
+
680
+ # Save uploaded file to data/pdb/ directory with proper naming
681
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
682
+ filename = f"{pdb_id}_{chain_id}_{timestamp}.pdb"
683
+ temp_file_path = PDB_DATA_DIR / filename
684
+
685
+ # Properly read and write the uploaded file
686
+ try:
687
+ if hasattr(pdb_file, 'name') and os.path.isfile(pdb_file.name):
688
+ # pdb_file is a file object with .name attribute
689
+ print(f"📁 Processing file object: {pdb_file.name}")
690
+ with open(pdb_file.name, "rb") as src:
691
+ with open(temp_file_path, "wb") as dst:
692
+ dst.write(src.read())
693
+ elif hasattr(pdb_file, 'read'):
694
+ # pdb_file is a file-like object
695
+ print(f"📄 Processing file-like object")
696
+ with open(temp_file_path, "wb") as f:
697
+ f.write(pdb_file.read())
698
+ else:
699
+ # pdb_file is a string (file path)
700
+ print(f"📍 Processing file path: {pdb_file}")
701
+ with open(str(pdb_file), "rb") as src:
702
+ with open(temp_file_path, "wb") as dst:
703
+ dst.write(src.read())
704
+
705
+ print(f"✅ PDB file saved to: {temp_file_path}")
706
+
707
+ except Exception as file_error:
708
+ print(f"❌ Error processing uploaded file: {file_error}")
709
+ return f"Error processing uploaded file: {str(file_error)}", "", "", "", "", ""
710
+
711
+ antigen_chain = AntigenChain.from_pdb(
712
+ path=str(temp_file_path),
713
+ chain_id=chain_id,
714
+ id=pdb_id
715
+ )
716
+ else:
717
+ # Load from PDB ID
718
+ if progress:
719
+ progress(0.25, desc=f"Downloading PDB structure {pdb_id}...")
720
+
721
+ antigen_chain = AntigenChain.from_pdb(
722
+ chain_id=chain_id,
723
+ id=pdb_id
724
+ )
725
+
726
+ except Exception as e:
727
+ return f"Error loading protein structure: {str(e)}", "", "", "", "", ""
728
+
729
+ if antigen_chain is None:
730
+ return "Error: Failed to load protein structure", "", "", "", "", ""
731
+
732
+ # Run prediction
733
+ if progress:
734
+ progress(0.4, desc="Running epitope prediction...")
735
+
736
+ try:
737
+ # Use threshold only if checkbox is checked
738
+ final_threshold = threshold if use_threshold else None
739
+
740
+ predict_results = antigen_chain.predict(
741
+ model_path=DEFAULT_MODEL_PATH,
742
+ device_id=device_id,
743
+ radius=radius,
744
+ k=k,
745
+ threshold=final_threshold,
746
+ verbose=True,
747
+ encoder=encoder,
748
+ use_gpu=use_gpu,
749
+ auto_cleanup=auto_cleanup
750
+ )
751
+ except Exception as e:
752
+ error_msg = f"Error during prediction: {str(e)}"
753
+ print(f"Prediction error: {error_msg}")
754
+ import traceback
755
+ traceback.print_exc()
756
+ return error_msg, "", "", "", "", ""
757
+
758
+ if progress:
759
+ progress(0.8, desc="Processing results...")
760
+
761
+ # Process results
762
+ if not predict_results:
763
+ return "Error: No prediction results generated", "", "", "", "", ""
764
+
765
+ # Extract prediction data
766
+ predicted_epitopes = predict_results.get("predicted_epitopes", [])
767
+ predictions = predict_results.get("predictions", {})
768
+ top_k_centers = predict_results.get("top_k_centers", [])
769
+ top_k_region_residues = predict_results.get("top_k_region_residues", [])
770
+ top_k_regions = predict_results.get("top_k_regions", [])
771
+
772
+ # Calculate summary statistics
773
+ protein_length = len(antigen_chain.sequence)
774
+ epitope_count = len(predicted_epitopes)
775
+ region_count = len(top_k_regions)
776
+ coverage_rate = (len(top_k_region_residues) / protein_length) * 100 if protein_length > 0 else 0
777
+
778
+ # Create summary text
779
+ summary_text = f"""
780
+ ## Prediction Results for {pdb_id}_{chain_id}
781
+
782
+ ### Protein Information
783
+ - **PDB ID**: {pdb_id}
784
+ - **Chain**: {chain_id}
785
+ - **Length**: {protein_length} residues
786
+ - **Sequence**: <div style="word-wrap: break-word; word-break: break-all; white-space: pre-wrap; max-width: 100%; font-family: monospace; background: #f5f5f5; padding: 8px; border-radius: 4px; margin: 5px 0; display: inline-block;">{antigen_chain.sequence}</div>
787
+
788
+ ### Prediction Summary
789
+ - **Predicted Epitopes**: {epitope_count}
790
+ - **Top-k Regions**: {region_count}
791
+ - **Coverage Rate**: {coverage_rate:.1f}%
792
+
793
+ ### Top-k Region Centers
794
+ {', '.join(map(str, top_k_centers))}
795
+
796
+ ### Predicted Epitope Residues
797
+ {', '.join(map(str, predicted_epitopes))}
798
+
799
+ ### Binding Region Residues (Top-k Union)
800
+ {', '.join(map(str, top_k_region_residues))}
801
+ """
802
+
803
+ # Create epitope list text with residue names
804
+ epitope_text = f"Predicted Epitope Residues ({len(predicted_epitopes)}):\n"
805
+ epitope_lines = []
806
+ for res in predicted_epitopes:
807
+ # Get residue index from residue number
808
+ if res in antigen_chain.resnum_to_index:
809
+ res_idx = antigen_chain.resnum_to_index[res]
810
+ res_name = antigen_chain.sequence[res_idx]
811
+ epitope_lines.append(f"Residue {res} ({res_name})")
812
+ else:
813
+ epitope_lines.append(f"Residue {res}")
814
+ epitope_text += "\n".join(epitope_lines)
815
+
816
+ # Create binding region text with residue names
817
+ binding_text = f"Binding Region Residues ({len(top_k_region_residues)}):\n"
818
+ binding_lines = []
819
+ for res in top_k_region_residues:
820
+ # Get residue index from residue number
821
+ if res in antigen_chain.resnum_to_index:
822
+ res_idx = antigen_chain.resnum_to_index[res]
823
+ res_name = antigen_chain.sequence[res_idx]
824
+ binding_lines.append(f"Residue {res} ({res_name})")
825
+ else:
826
+ binding_lines.append(f"Residue {res}")
827
+ binding_text += "\n".join(binding_lines)
828
+
829
+ # Create downloadable files
830
+ if progress:
831
+ progress(0.9, desc="Preparing download files...")
832
+
833
+ # JSON file
834
+ json_data = {
835
+ "protein_info": {
836
+ "id": pdb_id,
837
+ "chain_id": chain_id,
838
+ "length": protein_length,
839
+ "sequence": antigen_chain.sequence
840
+ },
841
+ "prediction": {
842
+ "predicted_epitopes": predicted_epitopes,
843
+ "predictions": predictions,
844
+ "top_k_centers": top_k_centers,
845
+ "top_k_region_residues": top_k_region_residues,
846
+ "top_k_regions": [
847
+ {
848
+ "center_idx": region.get('center_idx', 0),
849
+ "graph_pred": region.get('graph_pred', 0),
850
+ "covered_indices": region.get('covered_indices', [])
851
+ }
852
+ for region in top_k_regions
853
+ ],
854
+ "coverage_rate": coverage_rate,
855
+ "mean_region_value": 0 # No longer calculated
856
+ },
857
+ "parameters": {
858
+ "radius": radius,
859
+ "k": k,
860
+ "encoder": encoder,
861
+ "device_config": device_config,
862
+ "use_threshold": use_threshold,
863
+ "threshold": final_threshold,
864
+ "auto_cleanup": auto_cleanup
865
+ }
866
+ }
867
+
868
+ # Save JSON file
869
+ json_file_path = tempfile.mktemp(suffix=".json")
870
+ with open(json_file_path, "w") as f:
871
+ json.dump(json_data, f, indent=2)
872
+
873
+ # CSV file
874
+ csv_data = []
875
+ for i, residue_num in enumerate(antigen_chain.residue_index):
876
+ residue_num = int(residue_num)
877
+ csv_data.append({
878
+ "Residue_Number": residue_num,
879
+ "Residue_Type": antigen_chain.sequence[i],
880
+ "Prediction_Probability": predictions.get(residue_num, 0.0),
881
+ "Is_Predicted_Epitope": 1 if residue_num in predicted_epitopes else 0,
882
+ "Is_In_TopK_Regions": 1 if residue_num in top_k_region_residues else 0
883
+ })
884
+
885
+ csv_df = pd.DataFrame(csv_data)
886
+ csv_file_path = tempfile.mktemp(suffix=".csv")
887
+ csv_df.to_csv(csv_file_path, index=False)
888
+
889
+ # Create 3D visualization
890
+ if progress:
891
+ progress(0.95, desc="Creating 3D visualization...")
892
+
893
+ # Generate PDB string for visualization HTML file
894
+ html_file_path = None
895
+ try:
896
+ pdb_str = generate_pdb_string(antigen_chain)
897
+ html_content = create_pdb_visualization_html(
898
+ pdb_str, predicted_epitopes, predictions, f"{pdb_id}_{chain_id}", top_k_regions
899
+ )
900
+
901
+ # Save HTML file to data directory for download
902
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
903
+ html_filename = f"{pdb_id}_{chain_id}_visualization_{timestamp}.html"
904
+ html_file_path = PDB_DATA_DIR / html_filename
905
+
906
+ with open(html_file_path, "w", encoding='utf-8') as f:
907
+ f.write(html_content)
908
+
909
+ print(f"✅ 3D visualization HTML saved to: {html_file_path}")
910
+
911
+ except Exception as e:
912
+ html_file_path = None
913
+ print(f"Warning: Could not create 3D visualization: {str(e)}")
914
+
915
+ # Clean up temporary files if auto_cleanup is enabled
916
+ if auto_cleanup and temp_file_path and os.path.exists(temp_file_path):
917
+ os.remove(temp_file_path)
918
+ print(f"🧹 Cleaned up temporary file: {temp_file_path}")
919
+ elif temp_file_path and os.path.exists(temp_file_path):
920
+ print(f"📁 PDB file retained at: {temp_file_path}")
921
+
922
+ if progress:
923
+ progress(1.0, desc="Prediction completed!")
924
+
925
+ # Return all results including HTML file path for download
926
+ return (
927
+ summary_text,
928
+ epitope_text,
929
+ binding_text,
930
+ str(html_file_path) if html_file_path else None, # HTML file moved to 4th position
931
+ json_file_path,
932
+ csv_file_path
933
+ )
934
+
935
+ except Exception as e:
936
+ import traceback
937
+ error_msg = f"Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
938
+ return error_msg, "", "", "", "", ""
939
+
940
+ def generate_pdb_string(antigen_chain) -> str:
941
+ """Generate PDB string for 3D visualization"""
942
+ from esm.utils import residue_constants as RC
943
+
944
+ pdb_str = "MODEL 1\n"
945
+ atom_num = 1
946
+
947
+ for res_idx in range(len(antigen_chain.sequence)):
948
+ one_letter = antigen_chain.sequence[res_idx]
949
+ resname = antigen_chain.convert_letter_1to3(one_letter)
950
+ resnum = antigen_chain.residue_index[res_idx]
951
+
952
+ mask = antigen_chain.atom37_mask[res_idx]
953
+ coords = antigen_chain.atom37_positions[res_idx][mask]
954
+ atoms = [name for name, exists in zip(RC.atom_types, mask) if exists]
955
+
956
+ for atom_name, coord in zip(atoms, coords):
957
+ x, y, z = coord
958
+ pdb_str += (f"ATOM {atom_num:5d} {atom_name:<3s} {resname:>3s} {antigen_chain.chain_id:1s}{resnum:4d}"
959
+ f" {x:8.3f}{y:8.3f}{z:8.3f} 1.00 0.00\n")
960
+ atom_num += 1
961
+
962
+ pdb_str += "ENDMDL\n"
963
+ return pdb_str
964
+
965
+ def create_interface():
966
+ """Create the Gradio interface"""
967
+
968
+ with gr.Blocks(css="""
969
+ .container {
970
+ max-width: 1200px;
971
+ margin: 0 auto;
972
+ padding: 20px;
973
+ }
974
+ .header {
975
+ text-align: center;
976
+ margin-bottom: 30px;
977
+ padding: 20px;
978
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
979
+ color: white;
980
+ border-radius: 10px;
981
+ }
982
+ .header h1 {
983
+ font-size: 2.5em;
984
+ margin-bottom: 10px;
985
+ }
986
+ .form-row {
987
+ display: flex;
988
+ gap: 20px;
989
+ align-items: end;
990
+ }
991
+ .form-row > * {
992
+ flex: 1;
993
+ }
994
+ .section {
995
+ margin: 20px 0;
996
+ padding: 15px;
997
+ background: #f8f9fa;
998
+ border-radius: 8px;
999
+ border-left: 4px solid #007bff;
1000
+ }
1001
+ .section h2 {
1002
+ color: #333;
1003
+ margin-bottom: 15px;
1004
+ }
1005
+ .results-section {
1006
+ margin-top: 30px;
1007
+ padding: 20px;
1008
+ background: #f0f8ff;
1009
+ border-radius: 8px;
1010
+ border: 1px solid #e0e8f0;
1011
+ }
1012
+ .download-section {
1013
+ margin-top: 20px;
1014
+ padding: 15px;
1015
+ background: #f9f9f9;
1016
+ border-radius: 8px;
1017
+ }
1018
+ .download-section h3 {
1019
+ color: #333;
1020
+ margin-bottom: 10px;
1021
+ }
1022
+ """) as interface:
1023
+
1024
+ # Header
1025
+ gr.HTML("""
1026
+ <div class="header">
1027
+ <h1>🧬 B-cell Epitope Prediction Server</h1>
1028
+ <p>Predict epitopes using the ReCEP model</p>
1029
+ </div>
1030
+ """)
1031
+
1032
+ with gr.Row():
1033
+ with gr.Column(scale=1):
1034
+ gr.HTML("<div class='section'><h2>📋 Input Protein Structure</h2></div>")
1035
+
1036
+ input_method = gr.Radio(
1037
+ choices=["PDB ID", "Upload PDB File"],
1038
+ value="PDB ID",
1039
+ label="Input Method"
1040
+ )
1041
+
1042
+ pdb_id = gr.Textbox(
1043
+ label="PDB ID",
1044
+ placeholder="e.g., 5I9Q",
1045
+ max_lines=1,
1046
+ visible=True
1047
+ )
1048
+ pdb_file = gr.File(
1049
+ label="Upload PDB File",
1050
+ file_types=[".pdb", ".ent"],
1051
+ visible=False
1052
+ )
1053
+ chain_id = gr.Textbox(
1054
+ label="Chain ID",
1055
+ value="A",
1056
+ max_lines=1
1057
+ )
1058
+
1059
+ with gr.Accordion("🔧 Advanced Parameters", open=False):
1060
+ radius = gr.Slider(
1061
+ label="Radius (Å)",
1062
+ minimum=1.0,
1063
+ maximum=50.0,
1064
+ step=0.1,
1065
+ value=19.0
1066
+ )
1067
+ k = gr.Slider(
1068
+ label="Top-k Regions",
1069
+ minimum=1,
1070
+ maximum=20,
1071
+ step=1,
1072
+ value=7
1073
+ )
1074
+ encoder = gr.Dropdown(
1075
+ label="Encoder",
1076
+ choices=["esmc", "esm2"],
1077
+ value="esmc"
1078
+ )
1079
+ device_config = gr.Dropdown(
1080
+ label="Device Configuration",
1081
+ choices=["CPU Only", "GPU 0", "GPU 1", "GPU 2", "GPU 3"],
1082
+ value="CPU Only"
1083
+ )
1084
+ use_threshold = gr.Checkbox(
1085
+ label="Use Custom Threshold",
1086
+ value=False
1087
+ )
1088
+ threshold = gr.Number(
1089
+ label="Threshold Value",
1090
+ value=0.366,
1091
+ visible=False
1092
+ )
1093
+ auto_cleanup = gr.Checkbox(
1094
+ label="Auto-cleanup Generated Data",
1095
+ value=True
1096
+ )
1097
+
1098
+ predict_btn = gr.Button("🧮 Predict Epitopes", variant="primary", size="lg")
1099
+
1100
+ with gr.Column(scale=2):
1101
+ gr.HTML("<div class='section'><h2>📊 Prediction Results</h2></div>")
1102
+
1103
+ results_text = gr.Markdown(label="Prediction Summary", visible=True)
1104
+
1105
+ # 3D Visualization download (moved to top)
1106
+ gr.HTML("<div style='margin: 15px 0; padding: 10px; background: #f0f8ff; border-left: 4px solid #4a90e2; border-radius: 5px;'><h3 style='margin: 0 0 8px 0; color: #333;'>🧬 3D Visualization</h3><p style='margin: 0; color: #666;'>You can download the HTML to visualize the prediction results and the spheres used.</p></div>")
1107
+ html_download = gr.File(
1108
+ label="Download Interactive 3D Visualization HTML",
1109
+ visible=True
1110
+ )
1111
+
1112
+ with gr.Row():
1113
+ epitope_list = gr.Textbox(
1114
+ label="Predicted Epitope Residues",
1115
+ max_lines=10,
1116
+ interactive=False
1117
+ )
1118
+ binding_regions = gr.Textbox(
1119
+ label="Binding Region Residues",
1120
+ max_lines=10,
1121
+ interactive=False
1122
+ )
1123
+
1124
+ gr.HTML("<div class='download-section'><h3>📥 Download Data Results</h3></div>")
1125
+ with gr.Row():
1126
+ json_download = gr.File(
1127
+ label="JSON Results",
1128
+ visible=True
1129
+ )
1130
+ csv_download = gr.File(
1131
+ label="CSV Results",
1132
+ visible=True
1133
+ )
1134
+
1135
+ def toggle_input_method(method):
1136
+ return (gr.update(visible=method == "PDB ID"),
1137
+ gr.update(visible=method == "Upload PDB File"))
1138
+
1139
+ def toggle_threshold(use_threshold):
1140
+ return gr.update(visible=use_threshold)
1141
+
1142
+ input_method.change(toggle_input_method, inputs=[input_method], outputs=[pdb_id, pdb_file])
1143
+ use_threshold.change(toggle_threshold, inputs=[use_threshold], outputs=[threshold])
1144
+
1145
+ predict_btn.click(
1146
+ predict_epitopes,
1147
+ inputs=[
1148
+ pdb_id, pdb_file, chain_id, radius, k, encoder,
1149
+ device_config, use_threshold, threshold, auto_cleanup
1150
+ ],
1151
+ outputs=[
1152
+ results_text, epitope_list, binding_regions,
1153
+ html_download, json_download, csv_download
1154
+ ],
1155
+ show_progress=True
1156
+ )
1157
+
1158
+ gr.HTML("""
1159
+ <div style="text-align: center; margin-top: 30px; padding: 20px; background: #f0f0f0; border-radius: 10px;">
1160
+ <p>© 2024 B-cell Epitope Prediction Server | Powered by ReCEP model</p>
1161
+ <p>🚀 Advanced AI-powered epitope prediction with interactive 3D visualization</p>
1162
+ <p><strong>Features:</strong> PDB ID/File support • ESM-C encoder • GPU acceleration • 3D visualization • Multiple export formats</p>
1163
+ </div>
1164
+ """)
1165
+
1166
+ return interface
1167
+
1168
+
1169
+ if __name__ == "__main__":
1170
+ # Create and launch the interface
1171
+ try:
1172
+ interface = create_interface()
1173
+
1174
+ # Check if running on Hugging Face Spaces
1175
+ is_spaces = os.getenv("SPACE_ID") is not None
1176
+
1177
+ interface.launch(
1178
+ server_name="0.0.0.0",
1179
+ server_port=7860,
1180
+ share=is_spaces, # Use share=True on Spaces, False locally
1181
+ show_error=True,
1182
+ max_threads=4 if is_spaces else 8
1183
+ )
1184
+ except Exception as e:
1185
+ print(f"Error launching application: {e}")
1186
+ print("Please ensure all dependencies are installed correctly.")
1187
+ import traceback
1188
+ traceback.print_exc()
config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ title: B-cell Epitope Prediction Server
2
+ emoji: 🧬
3
+ colorFrom: blue
4
+ colorTo: purple
5
+ sdk: gradio
6
+ sdk_version: 4.44.1
7
+ app_file: app.py
8
+ pinned: false
9
+ license: mit
10
+ short_description: "AI-powered B-cell epitope prediction using the ReCEP model"
11
+ tags:
12
+ - bioinformatics
13
+ - protein-structure
14
+ - epitope-prediction
15
+ - machine-learning
16
+ - computational-biology
data/epitopes/data_splits.json ADDED
@@ -0,0 +1,2878 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": [
3
+ [
4
+ "6elu",
5
+ "A"
6
+ ],
7
+ [
8
+ "4zfg",
9
+ "A"
10
+ ],
11
+ [
12
+ "8tfl",
13
+ "B"
14
+ ],
15
+ [
16
+ "6gv4",
17
+ "B"
18
+ ],
19
+ [
20
+ "3lh2",
21
+ "V"
22
+ ],
23
+ [
24
+ "5b3j",
25
+ "C"
26
+ ],
27
+ [
28
+ "5bv7",
29
+ "A"
30
+ ],
31
+ [
32
+ "6u6u",
33
+ "R"
34
+ ],
35
+ [
36
+ "1fj1",
37
+ "E"
38
+ ],
39
+ [
40
+ "8tv3",
41
+ "A"
42
+ ],
43
+ [
44
+ "3vg9",
45
+ "A"
46
+ ],
47
+ [
48
+ "7kql",
49
+ "T"
50
+ ],
51
+ [
52
+ "1bgx",
53
+ "T"
54
+ ],
55
+ [
56
+ "9ima",
57
+ "A"
58
+ ],
59
+ [
60
+ "5fcu",
61
+ "G"
62
+ ],
63
+ [
64
+ "6v4n",
65
+ "W"
66
+ ],
67
+ [
68
+ "8u1c",
69
+ "A"
70
+ ],
71
+ [
72
+ "7tpd",
73
+ "A"
74
+ ],
75
+ [
76
+ "9ivk",
77
+ "A"
78
+ ],
79
+ [
80
+ "8zd5",
81
+ "D"
82
+ ],
83
+ [
84
+ "8r8d",
85
+ "B"
86
+ ],
87
+ [
88
+ "8yxi",
89
+ "A"
90
+ ],
91
+ [
92
+ "6y9b",
93
+ "C"
94
+ ],
95
+ [
96
+ "7tuf",
97
+ "C"
98
+ ],
99
+ [
100
+ "8gkl",
101
+ "E"
102
+ ],
103
+ [
104
+ "1lk3",
105
+ "B"
106
+ ],
107
+ [
108
+ "6wgl",
109
+ "C"
110
+ ],
111
+ [
112
+ "1egj",
113
+ "A"
114
+ ],
115
+ [
116
+ "6k65",
117
+ "A"
118
+ ],
119
+ [
120
+ "7lxx",
121
+ "A"
122
+ ],
123
+ [
124
+ "3kr3",
125
+ "D"
126
+ ],
127
+ [
128
+ "4f37",
129
+ "A"
130
+ ],
131
+ [
132
+ "6fgb",
133
+ "A"
134
+ ],
135
+ [
136
+ "5e8e",
137
+ "H"
138
+ ],
139
+ [
140
+ "6iw2",
141
+ "D"
142
+ ],
143
+ [
144
+ "7lf8",
145
+ "A"
146
+ ],
147
+ [
148
+ "1nsn",
149
+ "S"
150
+ ],
151
+ [
152
+ "4plj",
153
+ "A"
154
+ ],
155
+ [
156
+ "3rkd",
157
+ "A"
158
+ ],
159
+ [
160
+ "8y0r",
161
+ "2"
162
+ ],
163
+ [
164
+ "8y0q",
165
+ "2"
166
+ ],
167
+ [
168
+ "6hx4",
169
+ "A"
170
+ ],
171
+ [
172
+ "6wzk",
173
+ "E"
174
+ ],
175
+ [
176
+ "6wzj",
177
+ "E"
178
+ ],
179
+ [
180
+ "5ob5",
181
+ "A"
182
+ ],
183
+ [
184
+ "5hys",
185
+ "G"
186
+ ],
187
+ [
188
+ "7shu",
189
+ "A"
190
+ ],
191
+ [
192
+ "5veb",
193
+ "Y"
194
+ ],
195
+ [
196
+ "7zli",
197
+ "A"
198
+ ],
199
+ [
200
+ "6wbv",
201
+ "A"
202
+ ],
203
+ [
204
+ "4rdq",
205
+ "C"
206
+ ],
207
+ [
208
+ "8caf",
209
+ "E"
210
+ ],
211
+ [
212
+ "4hwb",
213
+ "A"
214
+ ],
215
+ [
216
+ "6ck9",
217
+ "G"
218
+ ],
219
+ [
220
+ "6mug",
221
+ "G"
222
+ ],
223
+ [
224
+ "6vzi",
225
+ "G"
226
+ ],
227
+ [
228
+ "4ma7",
229
+ "A"
230
+ ],
231
+ [
232
+ "4yxl",
233
+ "A"
234
+ ],
235
+ [
236
+ "1tqb",
237
+ "A"
238
+ ],
239
+ [
240
+ "4yxk",
241
+ "A"
242
+ ],
243
+ [
244
+ "6sv2",
245
+ "A"
246
+ ],
247
+ [
248
+ "8dyx",
249
+ "I"
250
+ ],
251
+ [
252
+ "7l7r",
253
+ "G"
254
+ ],
255
+ [
256
+ "6umx",
257
+ "A"
258
+ ],
259
+ [
260
+ "6wtu",
261
+ "A"
262
+ ],
263
+ [
264
+ "3l95",
265
+ "X"
266
+ ],
267
+ [
268
+ "8v2e",
269
+ "C"
270
+ ],
271
+ [
272
+ "4ala",
273
+ "C"
274
+ ],
275
+ [
276
+ "6wo4",
277
+ "E"
278
+ ],
279
+ [
280
+ "6uyf",
281
+ "E"
282
+ ],
283
+ [
284
+ "7jtg",
285
+ "E"
286
+ ],
287
+ [
288
+ "6bkc",
289
+ "E"
290
+ ],
291
+ [
292
+ "5ldn",
293
+ "A"
294
+ ],
295
+ [
296
+ "6nms",
297
+ "C"
298
+ ],
299
+ [
300
+ "4edw",
301
+ "V"
302
+ ],
303
+ [
304
+ "5vyf",
305
+ "F"
306
+ ],
307
+ [
308
+ "6qb6",
309
+ "A"
310
+ ],
311
+ [
312
+ "3cvh",
313
+ "M"
314
+ ],
315
+ [
316
+ "8tq8",
317
+ "C"
318
+ ],
319
+ [
320
+ "8tqa",
321
+ "A"
322
+ ],
323
+ [
324
+ "4xvu",
325
+ "H"
326
+ ],
327
+ [
328
+ "4xwo",
329
+ "A"
330
+ ],
331
+ [
332
+ "2wuc",
333
+ "A"
334
+ ],
335
+ [
336
+ "8jxs",
337
+ "A"
338
+ ],
339
+ [
340
+ "8hrx",
341
+ "B"
342
+ ],
343
+ [
344
+ "4ypg",
345
+ "D"
346
+ ],
347
+ [
348
+ "8urf",
349
+ "A"
350
+ ],
351
+ [
352
+ "8j8r",
353
+ "B"
354
+ ],
355
+ [
356
+ "4cni",
357
+ "D"
358
+ ],
359
+ [
360
+ "4o9h",
361
+ "A"
362
+ ],
363
+ [
364
+ "4zs7",
365
+ "A"
366
+ ],
367
+ [
368
+ "5ukr",
369
+ "G"
370
+ ],
371
+ [
372
+ "4a6y",
373
+ "A"
374
+ ],
375
+ [
376
+ "7kq7",
377
+ "B"
378
+ ],
379
+ [
380
+ "8wre",
381
+ "A"
382
+ ],
383
+ [
384
+ "3qwo",
385
+ "C"
386
+ ],
387
+ [
388
+ "3gi9",
389
+ "C"
390
+ ],
391
+ [
392
+ "8f5i",
393
+ "A"
394
+ ],
395
+ [
396
+ "3s35",
397
+ "X"
398
+ ],
399
+ [
400
+ "1ncc",
401
+ "N"
402
+ ],
403
+ [
404
+ "6lxj",
405
+ "D"
406
+ ],
407
+ [
408
+ "4kxz",
409
+ "D"
410
+ ],
411
+ [
412
+ "7v23",
413
+ "B"
414
+ ],
415
+ [
416
+ "9c44",
417
+ "C"
418
+ ],
419
+ [
420
+ "7k9j",
421
+ "C"
422
+ ],
423
+ [
424
+ "8hhy",
425
+ "A"
426
+ ],
427
+ [
428
+ "7e7x",
429
+ "A"
430
+ ],
431
+ [
432
+ "6ogx",
433
+ "G"
434
+ ],
435
+ [
436
+ "7yk4",
437
+ "B"
438
+ ],
439
+ [
440
+ "4dw2",
441
+ "U"
442
+ ],
443
+ [
444
+ "5k9k",
445
+ "F"
446
+ ],
447
+ [
448
+ "8ut3",
449
+ "C"
450
+ ],
451
+ [
452
+ "4o58",
453
+ "A"
454
+ ],
455
+ [
456
+ "6umg",
457
+ "C"
458
+ ],
459
+ [
460
+ "5ikc",
461
+ "N"
462
+ ],
463
+ [
464
+ "6tyb",
465
+ "G"
466
+ ],
467
+ [
468
+ "7dm2",
469
+ "A"
470
+ ],
471
+ [
472
+ "3v6o",
473
+ "B"
474
+ ],
475
+ [
476
+ "4u6v",
477
+ "B"
478
+ ],
479
+ [
480
+ "4khx",
481
+ "A"
482
+ ],
483
+ [
484
+ "5w2b",
485
+ "A"
486
+ ],
487
+ [
488
+ "6mlk",
489
+ "A"
490
+ ],
491
+ [
492
+ "4ywg",
493
+ "G"
494
+ ],
495
+ [
496
+ "8b7h",
497
+ "A"
498
+ ],
499
+ [
500
+ "7upw",
501
+ "A"
502
+ ],
503
+ [
504
+ "8r1d",
505
+ "C"
506
+ ],
507
+ [
508
+ "7q0i",
509
+ "D"
510
+ ],
511
+ [
512
+ "7ttx",
513
+ "A"
514
+ ],
515
+ [
516
+ "8y6a",
517
+ "B"
518
+ ],
519
+ [
520
+ "6xkq",
521
+ "A"
522
+ ],
523
+ [
524
+ "3o2d",
525
+ "A"
526
+ ],
527
+ [
528
+ "1xf5",
529
+ "L"
530
+ ],
531
+ [
532
+ "5u3d",
533
+ "E"
534
+ ],
535
+ [
536
+ "8d9y",
537
+ "K"
538
+ ],
539
+ [
540
+ "1iqd",
541
+ "C"
542
+ ],
543
+ [
544
+ "8c7m",
545
+ "B"
546
+ ],
547
+ [
548
+ "7mlh",
549
+ "F"
550
+ ],
551
+ [
552
+ "6oor",
553
+ "A"
554
+ ],
555
+ [
556
+ "3uc0",
557
+ "B"
558
+ ],
559
+ [
560
+ "2hfg",
561
+ "R"
562
+ ],
563
+ [
564
+ "8too",
565
+ "I"
566
+ ],
567
+ [
568
+ "7a0x",
569
+ "B"
570
+ ],
571
+ [
572
+ "4krp",
573
+ "A"
574
+ ],
575
+ [
576
+ "3ks0",
577
+ "A"
578
+ ],
579
+ [
580
+ "8vgm",
581
+ "A"
582
+ ],
583
+ [
584
+ "6o9i",
585
+ "C"
586
+ ],
587
+ [
588
+ "8y6i",
589
+ "A"
590
+ ],
591
+ [
592
+ "6xsw",
593
+ "X"
594
+ ],
595
+ [
596
+ "8tbq",
597
+ "P"
598
+ ],
599
+ [
600
+ "6mvl",
601
+ "A"
602
+ ],
603
+ [
604
+ "5epm",
605
+ "C"
606
+ ],
607
+ [
608
+ "6ppg",
609
+ "G"
610
+ ],
611
+ [
612
+ "8ruu",
613
+ "Y"
614
+ ],
615
+ [
616
+ "6wmw",
617
+ "B"
618
+ ],
619
+ [
620
+ "6svl",
621
+ "J"
622
+ ],
623
+ [
624
+ "6vtw",
625
+ "A"
626
+ ],
627
+ [
628
+ "3b9k",
629
+ "F"
630
+ ],
631
+ [
632
+ "7nx3",
633
+ "F"
634
+ ],
635
+ [
636
+ "5hbv",
637
+ "B"
638
+ ],
639
+ [
640
+ "3grw",
641
+ "A"
642
+ ],
643
+ [
644
+ "4mwf",
645
+ "D"
646
+ ],
647
+ [
648
+ "6meh",
649
+ "C"
650
+ ],
651
+ [
652
+ "6uyd",
653
+ "F"
654
+ ],
655
+ [
656
+ "6wo5",
657
+ "E"
658
+ ],
659
+ [
660
+ "1xiw",
661
+ "B"
662
+ ],
663
+ [
664
+ "7bbj",
665
+ "B"
666
+ ],
667
+ [
668
+ "6xcj",
669
+ "G"
670
+ ],
671
+ [
672
+ "8jlx",
673
+ "A"
674
+ ],
675
+ [
676
+ "8jlw",
677
+ "A"
678
+ ],
679
+ [
680
+ "7s7i",
681
+ "B"
682
+ ],
683
+ [
684
+ "6ddm",
685
+ "C"
686
+ ],
687
+ [
688
+ "4yqx",
689
+ "M"
690
+ ],
691
+ [
692
+ "4yue",
693
+ "C"
694
+ ],
695
+ [
696
+ "8dcn",
697
+ "F"
698
+ ],
699
+ [
700
+ "4i3r",
701
+ "G"
702
+ ],
703
+ [
704
+ "7n3c",
705
+ "C"
706
+ ],
707
+ [
708
+ "7sue",
709
+ "D"
710
+ ],
711
+ [
712
+ "6aod",
713
+ "C"
714
+ ],
715
+ [
716
+ "4aei",
717
+ "C"
718
+ ],
719
+ [
720
+ "7cj2",
721
+ "A"
722
+ ],
723
+ [
724
+ "5xez",
725
+ "A"
726
+ ],
727
+ [
728
+ "5yoy",
729
+ "A"
730
+ ],
731
+ [
732
+ "5wux",
733
+ "E"
734
+ ],
735
+ [
736
+ "5mo9",
737
+ "X"
738
+ ],
739
+ [
740
+ "4np4",
741
+ "A"
742
+ ],
743
+ [
744
+ "6pi7",
745
+ "A"
746
+ ],
747
+ [
748
+ "7usl",
749
+ "C"
750
+ ],
751
+ [
752
+ "7rah",
753
+ "E"
754
+ ],
755
+ [
756
+ "1ob1",
757
+ "F"
758
+ ],
759
+ [
760
+ "8dfh",
761
+ "A"
762
+ ],
763
+ [
764
+ "6xqw",
765
+ "E"
766
+ ],
767
+ [
768
+ "4f2m",
769
+ "E"
770
+ ],
771
+ [
772
+ "2j88",
773
+ "A"
774
+ ],
775
+ [
776
+ "4ps4",
777
+ "A"
778
+ ],
779
+ [
780
+ "8slb",
781
+ "A"
782
+ ],
783
+ [
784
+ "8jkf",
785
+ "A"
786
+ ],
787
+ [
788
+ "4xp9",
789
+ "C"
790
+ ],
791
+ [
792
+ "1uwx",
793
+ "B"
794
+ ],
795
+ [
796
+ "6wfw",
797
+ "G"
798
+ ],
799
+ [
800
+ "6u8c",
801
+ "B"
802
+ ],
803
+ [
804
+ "4lmq",
805
+ "F"
806
+ ],
807
+ [
808
+ "8w86",
809
+ "D"
810
+ ],
811
+ [
812
+ "8w83",
813
+ "L"
814
+ ],
815
+ [
816
+ "8w85",
817
+ "H"
818
+ ],
819
+ [
820
+ "6xxv",
821
+ "C"
822
+ ],
823
+ [
824
+ "9dez",
825
+ "C"
826
+ ],
827
+ [
828
+ "5w5z",
829
+ "A"
830
+ ],
831
+ [
832
+ "1cl7",
833
+ "I"
834
+ ],
835
+ [
836
+ "8tco",
837
+ "A"
838
+ ],
839
+ [
840
+ "6c9u",
841
+ "A"
842
+ ],
843
+ [
844
+ "7ox4",
845
+ "C"
846
+ ],
847
+ [
848
+ "6a77",
849
+ "A"
850
+ ],
851
+ [
852
+ "6cw2",
853
+ "C"
854
+ ],
855
+ [
856
+ "8dn7",
857
+ "C"
858
+ ],
859
+ [
860
+ "5k59",
861
+ "B"
862
+ ],
863
+ [
864
+ "7xy8",
865
+ "A"
866
+ ],
867
+ [
868
+ "7daa",
869
+ "A"
870
+ ],
871
+ [
872
+ "3wih",
873
+ "A"
874
+ ],
875
+ [
876
+ "8tp7",
877
+ "C"
878
+ ],
879
+ [
880
+ "6iek",
881
+ "D"
882
+ ],
883
+ [
884
+ "7ul3",
885
+ "A"
886
+ ],
887
+ [
888
+ "5x2n",
889
+ "C"
890
+ ],
891
+ [
892
+ "7txt",
893
+ "S"
894
+ ],
895
+ [
896
+ "8e8x",
897
+ "1"
898
+ ],
899
+ [
900
+ "8e8y",
901
+ "1"
902
+ ],
903
+ [
904
+ "3d85",
905
+ "C"
906
+ ],
907
+ [
908
+ "6ztr",
909
+ "I"
910
+ ],
911
+ [
912
+ "8d9z",
913
+ "D"
914
+ ],
915
+ [
916
+ "3nh7",
917
+ "C"
918
+ ],
919
+ [
920
+ "8y6h",
921
+ "A"
922
+ ],
923
+ [
924
+ "8y31",
925
+ "E"
926
+ ],
927
+ [
928
+ "3hi6",
929
+ "A"
930
+ ],
931
+ [
932
+ "5jq6",
933
+ "A"
934
+ ],
935
+ [
936
+ "6ks0",
937
+ "A"
938
+ ],
939
+ [
940
+ "5f3b",
941
+ "C"
942
+ ],
943
+ [
944
+ "5th9",
945
+ "B"
946
+ ],
947
+ [
948
+ "5kw9",
949
+ "A"
950
+ ],
951
+ [
952
+ "6z3q",
953
+ "A"
954
+ ],
955
+ [
956
+ "5wi9",
957
+ "A"
958
+ ],
959
+ [
960
+ "6uj9",
961
+ "A"
962
+ ],
963
+ [
964
+ "3hae",
965
+ "J"
966
+ ],
967
+ [
968
+ "7t0l",
969
+ "D"
970
+ ],
971
+ [
972
+ "6id4",
973
+ "E"
974
+ ],
975
+ [
976
+ "3l5x",
977
+ "A"
978
+ ],
979
+ [
980
+ "5l6y",
981
+ "C"
982
+ ],
983
+ [
984
+ "7rew",
985
+ "I"
986
+ ],
987
+ [
988
+ "3l5w",
989
+ "J"
990
+ ],
991
+ [
992
+ "6yio",
993
+ "B"
994
+ ],
995
+ [
996
+ "3nfp",
997
+ "I"
998
+ ],
999
+ [
1000
+ "2hmi",
1001
+ "B"
1002
+ ],
1003
+ [
1004
+ "8da0",
1005
+ "E"
1006
+ ],
1007
+ [
1008
+ "6wh9",
1009
+ "D"
1010
+ ],
1011
+ [
1012
+ "7rt9",
1013
+ "Y"
1014
+ ],
1015
+ [
1016
+ "7ec5",
1017
+ "C"
1018
+ ],
1019
+ [
1020
+ "6vvu",
1021
+ "B"
1022
+ ],
1023
+ [
1024
+ "6x3x",
1025
+ "D"
1026
+ ],
1027
+ [
1028
+ "7zoz",
1029
+ "A"
1030
+ ],
1031
+ [
1032
+ "5tl5",
1033
+ "A"
1034
+ ],
1035
+ [
1036
+ "5gzn",
1037
+ "E"
1038
+ ],
1039
+ [
1040
+ "7vux",
1041
+ "A"
1042
+ ],
1043
+ [
1044
+ "6jbt",
1045
+ "F"
1046
+ ],
1047
+ [
1048
+ "5wt9",
1049
+ "G"
1050
+ ],
1051
+ [
1052
+ "6j15",
1053
+ "D"
1054
+ ],
1055
+ [
1056
+ "5jxe",
1057
+ "B"
1058
+ ],
1059
+ [
1060
+ "6j14",
1061
+ "G"
1062
+ ],
1063
+ [
1064
+ "1rjl",
1065
+ "C"
1066
+ ],
1067
+ [
1068
+ "8uky",
1069
+ "D"
1070
+ ],
1071
+ [
1072
+ "7wg3",
1073
+ "K"
1074
+ ],
1075
+ [
1076
+ "7x28",
1077
+ "F"
1078
+ ],
1079
+ [
1080
+ "4ffv",
1081
+ "B"
1082
+ ],
1083
+ [
1084
+ "2oz4",
1085
+ "A"
1086
+ ],
1087
+ [
1088
+ "4hc1",
1089
+ "A"
1090
+ ],
1091
+ [
1092
+ "7lsg",
1093
+ "C"
1094
+ ],
1095
+ [
1096
+ "6j5d",
1097
+ "A"
1098
+ ],
1099
+ [
1100
+ "7kyl",
1101
+ "Z"
1102
+ ],
1103
+ [
1104
+ "7lse",
1105
+ "E"
1106
+ ],
1107
+ [
1108
+ "6cmi",
1109
+ "B"
1110
+ ],
1111
+ [
1112
+ "8rp8",
1113
+ "D"
1114
+ ],
1115
+ [
1116
+ "7wn8",
1117
+ "C"
1118
+ ],
1119
+ [
1120
+ "8jel",
1121
+ "J"
1122
+ ],
1123
+ [
1124
+ "4jlr",
1125
+ "C"
1126
+ ],
1127
+ [
1128
+ "1kb5",
1129
+ "B"
1130
+ ],
1131
+ [
1132
+ "8c7h",
1133
+ "D"
1134
+ ],
1135
+ [
1136
+ "2vxt",
1137
+ "I"
1138
+ ],
1139
+ [
1140
+ "4qhu",
1141
+ "C"
1142
+ ],
1143
+ [
1144
+ "5n7w",
1145
+ "X"
1146
+ ],
1147
+ [
1148
+ "7z2m",
1149
+ "G"
1150
+ ],
1151
+ [
1152
+ "6wio",
1153
+ "C"
1154
+ ],
1155
+ [
1156
+ "2vxs",
1157
+ "A"
1158
+ ],
1159
+ [
1160
+ "4nnp",
1161
+ "B"
1162
+ ],
1163
+ [
1164
+ "8f5n",
1165
+ "A"
1166
+ ],
1167
+ [
1168
+ "8iuy",
1169
+ "A"
1170
+ ],
1171
+ [
1172
+ "6adb",
1173
+ "A"
1174
+ ],
1175
+ [
1176
+ "5ggv",
1177
+ "Y"
1178
+ ],
1179
+ [
1180
+ "3ld8",
1181
+ "A"
1182
+ ],
1183
+ [
1184
+ "2q8a",
1185
+ "A"
1186
+ ],
1187
+ [
1188
+ "2q8b",
1189
+ "A"
1190
+ ],
1191
+ [
1192
+ "7n8q",
1193
+ "A"
1194
+ ],
1195
+ [
1196
+ "7ued",
1197
+ "M"
1198
+ ],
1199
+ [
1200
+ "7vgr",
1201
+ "A"
1202
+ ],
1203
+ [
1204
+ "7phu",
1205
+ "A"
1206
+ ],
1207
+ [
1208
+ "2r56",
1209
+ "A"
1210
+ ],
1211
+ [
1212
+ "7bsc",
1213
+ "A"
1214
+ ],
1215
+ [
1216
+ "6ewb",
1217
+ "C"
1218
+ ],
1219
+ [
1220
+ "6n81",
1221
+ "A"
1222
+ ],
1223
+ [
1224
+ "3wfd",
1225
+ "B"
1226
+ ],
1227
+ [
1228
+ "4i2x",
1229
+ "E"
1230
+ ],
1231
+ [
1232
+ "6mei",
1233
+ "C"
1234
+ ],
1235
+ [
1236
+ "7dc8",
1237
+ "C"
1238
+ ],
1239
+ [
1240
+ "8ts0",
1241
+ "A"
1242
+ ],
1243
+ [
1244
+ "8d7e",
1245
+ "C"
1246
+ ],
1247
+ [
1248
+ "5y9j",
1249
+ "A"
1250
+ ],
1251
+ [
1252
+ "4qci",
1253
+ "D"
1254
+ ],
1255
+ [
1256
+ "8sgi",
1257
+ "A"
1258
+ ],
1259
+ [
1260
+ "9gwt",
1261
+ "P"
1262
+ ],
1263
+ [
1264
+ "8oxv",
1265
+ "A"
1266
+ ],
1267
+ [
1268
+ "4lvo",
1269
+ "A"
1270
+ ],
1271
+ [
1272
+ "4d9q",
1273
+ "A"
1274
+ ],
1275
+ [
1276
+ "8ee0",
1277
+ "A"
1278
+ ],
1279
+ [
1280
+ "8aci",
1281
+ "A"
1282
+ ],
1283
+ [
1284
+ "5xj4",
1285
+ "A"
1286
+ ],
1287
+ [
1288
+ "5xxy",
1289
+ "A"
1290
+ ],
1291
+ [
1292
+ "7ket",
1293
+ "C"
1294
+ ],
1295
+ [
1296
+ "2ypv",
1297
+ "A"
1298
+ ],
1299
+ [
1300
+ "6al5",
1301
+ "A"
1302
+ ],
1303
+ [
1304
+ "2arj",
1305
+ "R"
1306
+ ],
1307
+ [
1308
+ "6m3b",
1309
+ "A"
1310
+ ],
1311
+ [
1312
+ "5e94",
1313
+ "G"
1314
+ ],
1315
+ [
1316
+ "4fqj",
1317
+ "A"
1318
+ ],
1319
+ [
1320
+ "6lyn",
1321
+ "C"
1322
+ ],
1323
+ [
1324
+ "4d3c",
1325
+ "A"
1326
+ ],
1327
+ [
1328
+ "3jwd",
1329
+ "A"
1330
+ ],
1331
+ [
1332
+ "7jkt",
1333
+ "G"
1334
+ ],
1335
+ [
1336
+ "4rwy",
1337
+ "A"
1338
+ ],
1339
+ [
1340
+ "4ye4",
1341
+ "G"
1342
+ ],
1343
+ [
1344
+ "3idx",
1345
+ "G"
1346
+ ],
1347
+ [
1348
+ "1g9n",
1349
+ "G"
1350
+ ],
1351
+ [
1352
+ "6vep",
1353
+ "Q"
1354
+ ],
1355
+ [
1356
+ "7kd6",
1357
+ "W"
1358
+ ],
1359
+ [
1360
+ "2h9g",
1361
+ "R"
1362
+ ],
1363
+ [
1364
+ "8sic",
1365
+ "G"
1366
+ ],
1367
+ [
1368
+ "5vpl",
1369
+ "A"
1370
+ ],
1371
+ [
1372
+ "5vcn",
1373
+ "A"
1374
+ ],
1375
+ [
1376
+ "2adf",
1377
+ "A"
1378
+ ],
1379
+ [
1380
+ "8ahn",
1381
+ "A"
1382
+ ],
1383
+ [
1384
+ "7ox3",
1385
+ "C"
1386
+ ],
1387
+ [
1388
+ "7ox1",
1389
+ "X"
1390
+ ],
1391
+ [
1392
+ "7lr3",
1393
+ "C"
1394
+ ],
1395
+ [
1396
+ "4qti",
1397
+ "U"
1398
+ ],
1399
+ [
1400
+ "7x8q",
1401
+ "D"
1402
+ ],
1403
+ [
1404
+ "2vxq",
1405
+ "A"
1406
+ ],
1407
+ [
1408
+ "1v7m",
1409
+ "V"
1410
+ ],
1411
+ [
1412
+ "5mhr",
1413
+ "D"
1414
+ ],
1415
+ [
1416
+ "8djg",
1417
+ "F"
1418
+ ],
1419
+ [
1420
+ "5occ",
1421
+ "A"
1422
+ ],
1423
+ [
1424
+ "4rgo",
1425
+ "S"
1426
+ ],
1427
+ [
1428
+ "3u9p",
1429
+ "C"
1430
+ ],
1431
+ [
1432
+ "4oii",
1433
+ "B"
1434
+ ],
1435
+ [
1436
+ "7df1",
1437
+ "D"
1438
+ ],
1439
+ [
1440
+ "8ath",
1441
+ "A"
1442
+ ],
1443
+ [
1444
+ "8pg0",
1445
+ "A"
1446
+ ],
1447
+ [
1448
+ "8kdm",
1449
+ "B"
1450
+ ],
1451
+ [
1452
+ "7ttm",
1453
+ "A"
1454
+ ],
1455
+ [
1456
+ "7rp2",
1457
+ "A"
1458
+ ],
1459
+ [
1460
+ "2uzi",
1461
+ "R"
1462
+ ],
1463
+ [
1464
+ "8g8d",
1465
+ "P"
1466
+ ],
1467
+ [
1468
+ "4g6f",
1469
+ "F"
1470
+ ],
1471
+ [
1472
+ "5u3m",
1473
+ "A"
1474
+ ],
1475
+ [
1476
+ "6p9h",
1477
+ "A"
1478
+ ],
1479
+ [
1480
+ "5eu7",
1481
+ "B"
1482
+ ],
1483
+ [
1484
+ "2qqk",
1485
+ "A"
1486
+ ],
1487
+ [
1488
+ "4tsa",
1489
+ "A"
1490
+ ],
1491
+ [
1492
+ "6u9s",
1493
+ "C"
1494
+ ],
1495
+ [
1496
+ "6hig",
1497
+ "B"
1498
+ ],
1499
+ [
1500
+ "6lxi",
1501
+ "B"
1502
+ ],
1503
+ [
1504
+ "4qnp",
1505
+ "A"
1506
+ ],
1507
+ [
1508
+ "6kyz",
1509
+ "A"
1510
+ ],
1511
+ [
1512
+ "6kz0",
1513
+ "J"
1514
+ ],
1515
+ [
1516
+ "2xqb",
1517
+ "A"
1518
+ ],
1519
+ [
1520
+ "5w3l",
1521
+ "B"
1522
+ ],
1523
+ [
1524
+ "7uvf",
1525
+ "B"
1526
+ ],
1527
+ [
1528
+ "3lev",
1529
+ "A"
1530
+ ],
1531
+ [
1532
+ "6v4p",
1533
+ "B"
1534
+ ],
1535
+ [
1536
+ "7rxl",
1537
+ "F"
1538
+ ],
1539
+ [
1540
+ "6sni",
1541
+ "X"
1542
+ ],
1543
+ [
1544
+ "7qu1",
1545
+ "C"
1546
+ ],
1547
+ [
1548
+ "3ehb",
1549
+ "B"
1550
+ ],
1551
+ [
1552
+ "6u2f",
1553
+ "A"
1554
+ ],
1555
+ [
1556
+ "6u36",
1557
+ "B"
1558
+ ],
1559
+ [
1560
+ "1dee",
1561
+ "H"
1562
+ ],
1563
+ [
1564
+ "1oaz",
1565
+ "A"
1566
+ ],
1567
+ [
1568
+ "8byu",
1569
+ "A"
1570
+ ],
1571
+ [
1572
+ "3u30",
1573
+ "A"
1574
+ ],
1575
+ [
1576
+ "3ru8",
1577
+ "X"
1578
+ ],
1579
+ [
1580
+ "6wzl",
1581
+ "E"
1582
+ ],
1583
+ [
1584
+ "9dwe",
1585
+ "A"
1586
+ ],
1587
+ [
1588
+ "3gbm",
1589
+ "A"
1590
+ ],
1591
+ [
1592
+ "5dur",
1593
+ "A"
1594
+ ],
1595
+ [
1596
+ "6iuv",
1597
+ "A"
1598
+ ],
1599
+ [
1600
+ "4xnm",
1601
+ "C"
1602
+ ],
1603
+ [
1604
+ "8txp",
1605
+ "B"
1606
+ ],
1607
+ [
1608
+ "7l0l",
1609
+ "E"
1610
+ ],
1611
+ [
1612
+ "4k3j",
1613
+ "B"
1614
+ ],
1615
+ [
1616
+ "7a3o",
1617
+ "A"
1618
+ ],
1619
+ [
1620
+ "6dfj",
1621
+ "E"
1622
+ ],
1623
+ [
1624
+ "5vic",
1625
+ "E"
1626
+ ],
1627
+ [
1628
+ "4jzj",
1629
+ "D"
1630
+ ],
1631
+ [
1632
+ "7mrz",
1633
+ "C"
1634
+ ],
1635
+ [
1636
+ "3mj9",
1637
+ "A"
1638
+ ],
1639
+ [
1640
+ "3q3g",
1641
+ "I"
1642
+ ],
1643
+ [
1644
+ "7uot",
1645
+ "C"
1646
+ ],
1647
+ [
1648
+ "7tyv",
1649
+ "B"
1650
+ ],
1651
+ [
1652
+ "8dg9",
1653
+ "C"
1654
+ ],
1655
+ [
1656
+ "6apb",
1657
+ "C"
1658
+ ],
1659
+ [
1660
+ "8ulj",
1661
+ "B"
1662
+ ],
1663
+ [
1664
+ "7chz",
1665
+ "I"
1666
+ ],
1667
+ [
1668
+ "7c61",
1669
+ "A"
1670
+ ],
1671
+ [
1672
+ "8fdo",
1673
+ "C"
1674
+ ],
1675
+ [
1676
+ "7sgm",
1677
+ "B"
1678
+ ],
1679
+ [
1680
+ "1nl0",
1681
+ "G"
1682
+ ],
1683
+ [
1684
+ "4j6r",
1685
+ "G"
1686
+ ],
1687
+ [
1688
+ "4xmp",
1689
+ "G"
1690
+ ],
1691
+ [
1692
+ "4lss",
1693
+ "G"
1694
+ ],
1695
+ [
1696
+ "1e6j",
1697
+ "P"
1698
+ ],
1699
+ [
1700
+ "6wit",
1701
+ "C"
1702
+ ],
1703
+ [
1704
+ "6was",
1705
+ "G"
1706
+ ],
1707
+ [
1708
+ "6ks1",
1709
+ "A"
1710
+ ],
1711
+ [
1712
+ "5fb8",
1713
+ "C"
1714
+ ],
1715
+ [
1716
+ "8u2c",
1717
+ "A"
1718
+ ],
1719
+ [
1720
+ "6pe8",
1721
+ "U"
1722
+ ],
1723
+ [
1724
+ "8yx9",
1725
+ "J"
1726
+ ],
1727
+ [
1728
+ "6blh",
1729
+ "G"
1730
+ ],
1731
+ [
1732
+ "6uvo",
1733
+ "D"
1734
+ ],
1735
+ [
1736
+ "5if0",
1737
+ "I"
1738
+ ],
1739
+ [
1740
+ "4jpk",
1741
+ "A"
1742
+ ],
1743
+ [
1744
+ "2jel",
1745
+ "P"
1746
+ ],
1747
+ [
1748
+ "8sak",
1749
+ "B"
1750
+ ],
1751
+ [
1752
+ "3u4e",
1753
+ "J"
1754
+ ],
1755
+ [
1756
+ "3u2s",
1757
+ "G"
1758
+ ],
1759
+ [
1760
+ "7n0u",
1761
+ "C"
1762
+ ],
1763
+ [
1764
+ "8y3u",
1765
+ "K"
1766
+ ],
1767
+ [
1768
+ "8v5q",
1769
+ "G"
1770
+ ],
1771
+ [
1772
+ "6ion",
1773
+ "A"
1774
+ ],
1775
+ [
1776
+ "8h3n",
1777
+ "C"
1778
+ ],
1779
+ [
1780
+ "7yms",
1781
+ "A"
1782
+ ],
1783
+ [
1784
+ "8db4",
1785
+ "E"
1786
+ ],
1787
+ [
1788
+ "6vgr",
1789
+ "A"
1790
+ ],
1791
+ [
1792
+ "8dao",
1793
+ "F"
1794
+ ],
1795
+ [
1796
+ "8g4t",
1797
+ "I"
1798
+ ],
1799
+ [
1800
+ "6nnf",
1801
+ "G"
1802
+ ],
1803
+ [
1804
+ "7kyo",
1805
+ "B"
1806
+ ],
1807
+ [
1808
+ "6xlq",
1809
+ "A"
1810
+ ],
1811
+ [
1812
+ "1nfd",
1813
+ "D"
1814
+ ],
1815
+ [
1816
+ "4ht1",
1817
+ "T"
1818
+ ],
1819
+ [
1820
+ "7u5b",
1821
+ "J"
1822
+ ],
1823
+ [
1824
+ "6ba5",
1825
+ "N"
1826
+ ],
1827
+ [
1828
+ "4okv",
1829
+ "F"
1830
+ ],
1831
+ [
1832
+ "5wk3",
1833
+ "C"
1834
+ ],
1835
+ [
1836
+ "7a3q",
1837
+ "A"
1838
+ ],
1839
+ [
1840
+ "4bz2",
1841
+ "A"
1842
+ ],
1843
+ [
1844
+ "3mxw",
1845
+ "A"
1846
+ ],
1847
+ [
1848
+ "1ors",
1849
+ "C"
1850
+ ],
1851
+ [
1852
+ "3q1s",
1853
+ "I"
1854
+ ],
1855
+ [
1856
+ "3wkm",
1857
+ "B"
1858
+ ],
1859
+ [
1860
+ "3lhp",
1861
+ "S"
1862
+ ],
1863
+ [
1864
+ "6s3d",
1865
+ "M"
1866
+ ],
1867
+ [
1868
+ "9dx6",
1869
+ "A"
1870
+ ],
1871
+ [
1872
+ "2j4w",
1873
+ "D"
1874
+ ],
1875
+ [
1876
+ "6z7z",
1877
+ "F"
1878
+ ],
1879
+ [
1880
+ "3w9e",
1881
+ "C"
1882
+ ],
1883
+ [
1884
+ "4dkf",
1885
+ "A"
1886
+ ],
1887
+ [
1888
+ "7ce2",
1889
+ "A"
1890
+ ],
1891
+ [
1892
+ "7upb",
1893
+ "D"
1894
+ ],
1895
+ [
1896
+ "7ki6",
1897
+ "A"
1898
+ ],
1899
+ [
1900
+ "5l0q",
1901
+ "A"
1902
+ ],
1903
+ [
1904
+ "7xw6",
1905
+ "R"
1906
+ ],
1907
+ [
1908
+ "6lz9",
1909
+ "B"
1910
+ ],
1911
+ [
1912
+ "7m3n",
1913
+ "A"
1914
+ ],
1915
+ [
1916
+ "7so5",
1917
+ "A"
1918
+ ],
1919
+ [
1920
+ "3tje",
1921
+ "F"
1922
+ ],
1923
+ [
1924
+ "3thm",
1925
+ "F"
1926
+ ],
1927
+ [
1928
+ "5te4",
1929
+ "G"
1930
+ ],
1931
+ [
1932
+ "6bgt",
1933
+ "C"
1934
+ ],
1935
+ [
1936
+ "4ag4",
1937
+ "A"
1938
+ ],
1939
+ [
1940
+ "7joo",
1941
+ "C"
1942
+ ],
1943
+ [
1944
+ "8tui",
1945
+ "A"
1946
+ ],
1947
+ [
1948
+ "3efd",
1949
+ "K"
1950
+ ],
1951
+ [
1952
+ "7r58",
1953
+ "A"
1954
+ ],
1955
+ [
1956
+ "2nyy",
1957
+ "A"
1958
+ ],
1959
+ [
1960
+ "8jnk",
1961
+ "C"
1962
+ ],
1963
+ [
1964
+ "8dn6",
1965
+ "A"
1966
+ ],
1967
+ [
1968
+ "4jr9",
1969
+ "A"
1970
+ ],
1971
+ [
1972
+ "6otc",
1973
+ "A"
1974
+ ],
1975
+ [
1976
+ "8tlm",
1977
+ "C"
1978
+ ],
1979
+ [
1980
+ "4uuj",
1981
+ "C"
1982
+ ],
1983
+ [
1984
+ "6by3",
1985
+ "C"
1986
+ ],
1987
+ [
1988
+ "5tud",
1989
+ "A"
1990
+ ],
1991
+ [
1992
+ "7xrz",
1993
+ "Y"
1994
+ ],
1995
+ [
1996
+ "4lu5",
1997
+ "A"
1998
+ ],
1999
+ [
2000
+ "4m1g",
2001
+ "B"
2002
+ ],
2003
+ [
2004
+ "9dh2",
2005
+ "T"
2006
+ ],
2007
+ [
2008
+ "6k7o",
2009
+ "P"
2010
+ ],
2011
+ [
2012
+ "7wvg",
2013
+ "B"
2014
+ ],
2015
+ [
2016
+ "8f38",
2017
+ "C"
2018
+ ],
2019
+ [
2020
+ "6ml8",
2021
+ "A"
2022
+ ],
2023
+ [
2024
+ "6q0l",
2025
+ "A"
2026
+ ],
2027
+ [
2028
+ "3gbn",
2029
+ "B"
2030
+ ],
2031
+ [
2032
+ "6cxy",
2033
+ "C"
2034
+ ],
2035
+ [
2036
+ "2aep",
2037
+ "A"
2038
+ ],
2039
+ [
2040
+ "8g3q",
2041
+ "G"
2042
+ ],
2043
+ [
2044
+ "6q20",
2045
+ "A"
2046
+ ],
2047
+ [
2048
+ "6n6b",
2049
+ "A"
2050
+ ],
2051
+ [
2052
+ "8gat",
2053
+ "A"
2054
+ ],
2055
+ [
2056
+ "5utz",
2057
+ "A"
2058
+ ],
2059
+ [
2060
+ "4zso",
2061
+ "E"
2062
+ ],
2063
+ [
2064
+ "6dkj",
2065
+ "D"
2066
+ ],
2067
+ [
2068
+ "7s13",
2069
+ "C"
2070
+ ],
2071
+ [
2072
+ "7x29",
2073
+ "C"
2074
+ ],
2075
+ [
2076
+ "8ol9",
2077
+ "H"
2078
+ ],
2079
+ [
2080
+ "8sxp",
2081
+ "C"
2082
+ ],
2083
+ [
2084
+ "8vvk",
2085
+ "B"
2086
+ ],
2087
+ [
2088
+ "8tv1",
2089
+ "C"
2090
+ ],
2091
+ [
2092
+ "4liq",
2093
+ "E"
2094
+ ],
2095
+ [
2096
+ "7lkf",
2097
+ "A"
2098
+ ],
2099
+ [
2100
+ "7zxk",
2101
+ "C"
2102
+ ],
2103
+ [
2104
+ "1jrh",
2105
+ "I"
2106
+ ],
2107
+ [
2108
+ "6osv",
2109
+ "K"
2110
+ ],
2111
+ [
2112
+ "9jbq",
2113
+ "C"
2114
+ ],
2115
+ [
2116
+ "6cyf",
2117
+ "Q"
2118
+ ],
2119
+ [
2120
+ "8vvm",
2121
+ "I"
2122
+ ],
2123
+ [
2124
+ "3skj",
2125
+ "F"
2126
+ ],
2127
+ [
2128
+ "2xqy",
2129
+ "E"
2130
+ ],
2131
+ [
2132
+ "7qu2",
2133
+ "C"
2134
+ ],
2135
+ [
2136
+ "8v52",
2137
+ "B"
2138
+ ],
2139
+ [
2140
+ "7wtf",
2141
+ "D"
2142
+ ],
2143
+ [
2144
+ "7xco",
2145
+ "C"
2146
+ ],
2147
+ [
2148
+ "9fjk",
2149
+ "B"
2150
+ ],
2151
+ [
2152
+ "8xi6",
2153
+ "C"
2154
+ ],
2155
+ [
2156
+ "7e72",
2157
+ "E"
2158
+ ],
2159
+ [
2160
+ "4lst",
2161
+ "G"
2162
+ ],
2163
+ [
2164
+ "5te7",
2165
+ "G"
2166
+ ],
2167
+ [
2168
+ "2ybr",
2169
+ "I"
2170
+ ],
2171
+ [
2172
+ "5ush",
2173
+ "X"
2174
+ ],
2175
+ [
2176
+ "5dhv",
2177
+ "M"
2178
+ ],
2179
+ [
2180
+ "8e8r",
2181
+ "2"
2182
+ ],
2183
+ [
2184
+ "8e8s",
2185
+ "2"
2186
+ ],
2187
+ [
2188
+ "5kn5",
2189
+ "C"
2190
+ ],
2191
+ [
2192
+ "6iap",
2193
+ "A"
2194
+ ],
2195
+ [
2196
+ "7amp",
2197
+ "B"
2198
+ ],
2199
+ [
2200
+ "8tfn",
2201
+ "B"
2202
+ ],
2203
+ [
2204
+ "3pnw",
2205
+ "R"
2206
+ ],
2207
+ [
2208
+ "6o39",
2209
+ "C"
2210
+ ],
2211
+ [
2212
+ "8x0t",
2213
+ "A"
2214
+ ],
2215
+ [
2216
+ "8da1",
2217
+ "I"
2218
+ ],
2219
+ [
2220
+ "6phb",
2221
+ "I"
2222
+ ],
2223
+ [
2224
+ "6phc",
2225
+ "I"
2226
+ ],
2227
+ [
2228
+ "6wix",
2229
+ "G"
2230
+ ],
2231
+ [
2232
+ "8k3c",
2233
+ "B"
2234
+ ],
2235
+ [
2236
+ "8ffe",
2237
+ "A"
2238
+ ],
2239
+ [
2240
+ "5otj",
2241
+ "D"
2242
+ ],
2243
+ [
2244
+ "5lsp",
2245
+ "P"
2246
+ ],
2247
+ [
2248
+ "6s5a",
2249
+ "A"
2250
+ ],
2251
+ [
2252
+ "3vi4",
2253
+ "D"
2254
+ ],
2255
+ [
2256
+ "5nh3",
2257
+ "B"
2258
+ ],
2259
+ [
2260
+ "3i50",
2261
+ "E"
2262
+ ],
2263
+ [
2264
+ "4wfg",
2265
+ "B"
2266
+ ],
2267
+ [
2268
+ "6pis",
2269
+ "B"
2270
+ ],
2271
+ [
2272
+ "6s8j",
2273
+ "E"
2274
+ ],
2275
+ [
2276
+ "3nps",
2277
+ "A"
2278
+ ],
2279
+ [
2280
+ "1kyo",
2281
+ "P"
2282
+ ],
2283
+ [
2284
+ "7quh",
2285
+ "A"
2286
+ ],
2287
+ [
2288
+ "8cz5",
2289
+ "A"
2290
+ ],
2291
+ [
2292
+ "8t03",
2293
+ "B"
2294
+ ],
2295
+ [
2296
+ "4k94",
2297
+ "C"
2298
+ ]
2299
+ ],
2300
+ "test": [
2301
+ [
2302
+ "6ktr",
2303
+ "C"
2304
+ ],
2305
+ [
2306
+ "7o9s",
2307
+ "A"
2308
+ ],
2309
+ [
2310
+ "2bdn",
2311
+ "A"
2312
+ ],
2313
+ [
2314
+ "8d1t",
2315
+ "A"
2316
+ ],
2317
+ [
2318
+ "9b2w",
2319
+ "D"
2320
+ ],
2321
+ [
2322
+ "6mej",
2323
+ "C"
2324
+ ],
2325
+ [
2326
+ "8dk6",
2327
+ "E"
2328
+ ],
2329
+ [
2330
+ "8vgn",
2331
+ "I"
2332
+ ],
2333
+ [
2334
+ "8f60",
2335
+ "C"
2336
+ ],
2337
+ [
2338
+ "8udz",
2339
+ "A"
2340
+ ],
2341
+ [
2342
+ "7kpj",
2343
+ "E"
2344
+ ],
2345
+ [
2346
+ "7o52",
2347
+ "U"
2348
+ ],
2349
+ [
2350
+ "5b71",
2351
+ "E"
2352
+ ],
2353
+ [
2354
+ "4zff",
2355
+ "D"
2356
+ ],
2357
+ [
2358
+ "4uu9",
2359
+ "D"
2360
+ ],
2361
+ [
2362
+ "7lr3",
2363
+ "D"
2364
+ ],
2365
+ [
2366
+ "7lr4",
2367
+ "D"
2368
+ ],
2369
+ [
2370
+ "1mhp",
2371
+ "B"
2372
+ ],
2373
+ [
2374
+ "1ztx",
2375
+ "E"
2376
+ ],
2377
+ [
2378
+ "6i8s",
2379
+ "D"
2380
+ ],
2381
+ [
2382
+ "7ujd",
2383
+ "A"
2384
+ ],
2385
+ [
2386
+ "3gbn",
2387
+ "A"
2388
+ ],
2389
+ [
2390
+ "8txp",
2391
+ "A"
2392
+ ],
2393
+ [
2394
+ "6uyn",
2395
+ "A"
2396
+ ],
2397
+ [
2398
+ "4cad",
2399
+ "I"
2400
+ ],
2401
+ [
2402
+ "1yjd",
2403
+ "C"
2404
+ ],
2405
+ [
2406
+ "5u8r",
2407
+ "A"
2408
+ ],
2409
+ [
2410
+ "5d72",
2411
+ "B"
2412
+ ],
2413
+ [
2414
+ "6bfq",
2415
+ "G"
2416
+ ],
2417
+ [
2418
+ "8a1e",
2419
+ "A"
2420
+ ],
2421
+ [
2422
+ "4irz",
2423
+ "A"
2424
+ ],
2425
+ [
2426
+ "6o3b",
2427
+ "C"
2428
+ ],
2429
+ [
2430
+ "7x8p",
2431
+ "D"
2432
+ ],
2433
+ [
2434
+ "2yss",
2435
+ "C"
2436
+ ],
2437
+ [
2438
+ "1uac",
2439
+ "Y"
2440
+ ],
2441
+ [
2442
+ "1fbi",
2443
+ "Y"
2444
+ ],
2445
+ [
2446
+ "5vjo",
2447
+ "F"
2448
+ ],
2449
+ [
2450
+ "1jhl",
2451
+ "A"
2452
+ ],
2453
+ [
2454
+ "5o1r",
2455
+ "A"
2456
+ ],
2457
+ [
2458
+ "7yru",
2459
+ "A"
2460
+ ],
2461
+ [
2462
+ "2qqn",
2463
+ "A"
2464
+ ],
2465
+ [
2466
+ "5w4l",
2467
+ "G"
2468
+ ],
2469
+ [
2470
+ "6bf4",
2471
+ "A"
2472
+ ],
2473
+ [
2474
+ "5h35",
2475
+ "C"
2476
+ ],
2477
+ [
2478
+ "5cbe",
2479
+ "F"
2480
+ ],
2481
+ [
2482
+ "5w5x",
2483
+ "A"
2484
+ ],
2485
+ [
2486
+ "7lfa",
2487
+ "C"
2488
+ ],
2489
+ [
2490
+ "7lf7",
2491
+ "M"
2492
+ ],
2493
+ [
2494
+ "8cdd",
2495
+ "B"
2496
+ ],
2497
+ [
2498
+ "7phw",
2499
+ "D"
2500
+ ],
2501
+ [
2502
+ "6hga",
2503
+ "B"
2504
+ ],
2505
+ [
2506
+ "6h3t",
2507
+ "B"
2508
+ ],
2509
+ [
2510
+ "5lcv",
2511
+ "A"
2512
+ ],
2513
+ [
2514
+ "5i9q",
2515
+ "A"
2516
+ ],
2517
+ [
2518
+ "6rlo",
2519
+ "J"
2520
+ ],
2521
+ [
2522
+ "2xra",
2523
+ "A"
2524
+ ],
2525
+ [
2526
+ "4nzr",
2527
+ "M"
2528
+ ],
2529
+ [
2530
+ "1sy6",
2531
+ "A"
2532
+ ],
2533
+ [
2534
+ "1xiw",
2535
+ "A"
2536
+ ],
2537
+ [
2538
+ "8vsj",
2539
+ "A"
2540
+ ],
2541
+ [
2542
+ "6m58",
2543
+ "A"
2544
+ ],
2545
+ [
2546
+ "7q6c",
2547
+ "A"
2548
+ ],
2549
+ [
2550
+ "8u03",
2551
+ "C"
2552
+ ],
2553
+ [
2554
+ "4wv1",
2555
+ "F"
2556
+ ],
2557
+ [
2558
+ "5bo1",
2559
+ "A"
2560
+ ],
2561
+ [
2562
+ "3liz",
2563
+ "A"
2564
+ ],
2565
+ [
2566
+ "7uij",
2567
+ "D"
2568
+ ],
2569
+ [
2570
+ "5tq0",
2571
+ "A"
2572
+ ],
2573
+ [
2574
+ "6uym",
2575
+ "E"
2576
+ ],
2577
+ [
2578
+ "4g7v",
2579
+ "S"
2580
+ ],
2581
+ [
2582
+ "8j80",
2583
+ "A"
2584
+ ],
2585
+ [
2586
+ "7unb",
2587
+ "R"
2588
+ ],
2589
+ [
2590
+ "6e63",
2591
+ "A"
2592
+ ],
2593
+ [
2594
+ "5vkd",
2595
+ "A"
2596
+ ],
2597
+ [
2598
+ "5nmv",
2599
+ "K"
2600
+ ],
2601
+ [
2602
+ "6wzm",
2603
+ "F"
2604
+ ],
2605
+ [
2606
+ "7tuy",
2607
+ "R"
2608
+ ],
2609
+ [
2610
+ "7ahu",
2611
+ "D"
2612
+ ],
2613
+ [
2614
+ "7zqt",
2615
+ "E"
2616
+ ],
2617
+ [
2618
+ "8w84",
2619
+ "C"
2620
+ ],
2621
+ [
2622
+ "8vyn",
2623
+ "C"
2624
+ ],
2625
+ [
2626
+ "4ot1",
2627
+ "A"
2628
+ ],
2629
+ [
2630
+ "7sem",
2631
+ "F"
2632
+ ],
2633
+ [
2634
+ "8t9z",
2635
+ "A"
2636
+ ],
2637
+ [
2638
+ "6rps",
2639
+ "A"
2640
+ ],
2641
+ [
2642
+ "6gku",
2643
+ "A"
2644
+ ],
2645
+ [
2646
+ "7uvi",
2647
+ "C"
2648
+ ],
2649
+ [
2650
+ "7uvs",
2651
+ "C"
2652
+ ],
2653
+ [
2654
+ "8vzn",
2655
+ "A"
2656
+ ],
2657
+ [
2658
+ "5d8j",
2659
+ "A"
2660
+ ],
2661
+ [
2662
+ "3t2n",
2663
+ "A"
2664
+ ],
2665
+ [
2666
+ "8tq9",
2667
+ "B"
2668
+ ],
2669
+ [
2670
+ "4qww",
2671
+ "B"
2672
+ ],
2673
+ [
2674
+ "1uj3",
2675
+ "C"
2676
+ ],
2677
+ [
2678
+ "7xnf",
2679
+ "A"
2680
+ ],
2681
+ [
2682
+ "6mi2",
2683
+ "C"
2684
+ ],
2685
+ [
2686
+ "6a3w",
2687
+ "L"
2688
+ ],
2689
+ [
2690
+ "3zkn",
2691
+ "B"
2692
+ ],
2693
+ [
2694
+ "7ly0",
2695
+ "A"
2696
+ ],
2697
+ [
2698
+ "7lxw",
2699
+ "A"
2700
+ ],
2701
+ [
2702
+ "4uta",
2703
+ "B"
2704
+ ],
2705
+ [
2706
+ "6flb",
2707
+ "G"
2708
+ ],
2709
+ [
2710
+ "2j5l",
2711
+ "A"
2712
+ ],
2713
+ [
2714
+ "6mto",
2715
+ "T"
2716
+ ],
2717
+ [
2718
+ "8tzw",
2719
+ "C"
2720
+ ],
2721
+ [
2722
+ "4m62",
2723
+ "T"
2724
+ ],
2725
+ [
2726
+ "6cw2",
2727
+ "D"
2728
+ ],
2729
+ [
2730
+ "6cw3",
2731
+ "F"
2732
+ ],
2733
+ [
2734
+ "3hmx",
2735
+ "A"
2736
+ ],
2737
+ [
2738
+ "4dtg",
2739
+ "K"
2740
+ ],
2741
+ [
2742
+ "4rrp",
2743
+ "P"
2744
+ ],
2745
+ [
2746
+ "5eii",
2747
+ "I"
2748
+ ],
2749
+ [
2750
+ "8qya",
2751
+ "A"
2752
+ ],
2753
+ [
2754
+ "8a44",
2755
+ "A"
2756
+ ],
2757
+ [
2758
+ "7zjl",
2759
+ "A"
2760
+ ],
2761
+ [
2762
+ "8a99",
2763
+ "C"
2764
+ ],
2765
+ [
2766
+ "7mjk",
2767
+ "C"
2768
+ ],
2769
+ [
2770
+ "7q6e",
2771
+ "A"
2772
+ ],
2773
+ [
2774
+ "8h07",
2775
+ "B"
2776
+ ],
2777
+ [
2778
+ "6p67",
2779
+ "G"
2780
+ ],
2781
+ [
2782
+ "1wej",
2783
+ "F"
2784
+ ],
2785
+ [
2786
+ "4ogy",
2787
+ "A"
2788
+ ],
2789
+ [
2790
+ "8fgx",
2791
+ "C"
2792
+ ],
2793
+ [
2794
+ "1kb5",
2795
+ "A"
2796
+ ],
2797
+ [
2798
+ "4f15",
2799
+ "J"
2800
+ ],
2801
+ [
2802
+ "4leo",
2803
+ "C"
2804
+ ],
2805
+ [
2806
+ "8vdl",
2807
+ "C"
2808
+ ],
2809
+ [
2810
+ "6h2y",
2811
+ "D"
2812
+ ],
2813
+ [
2814
+ "8bk2",
2815
+ "B"
2816
+ ],
2817
+ [
2818
+ "5d93",
2819
+ "A"
2820
+ ],
2821
+ [
2822
+ "4ij3",
2823
+ "A"
2824
+ ],
2825
+ [
2826
+ "3r1g",
2827
+ "B"
2828
+ ],
2829
+ [
2830
+ "6vn1",
2831
+ "C"
2832
+ ],
2833
+ [
2834
+ "6qig",
2835
+ "A"
2836
+ ],
2837
+ [
2838
+ "7k7h",
2839
+ "A"
2840
+ ],
2841
+ [
2842
+ "8hpk",
2843
+ "A"
2844
+ ],
2845
+ [
2846
+ "5e8d",
2847
+ "A"
2848
+ ],
2849
+ [
2850
+ "5bk2",
2851
+ "A"
2852
+ ],
2853
+ [
2854
+ "6nyq",
2855
+ "C"
2856
+ ],
2857
+ [
2858
+ "2zch",
2859
+ "P"
2860
+ ],
2861
+ [
2862
+ "8v91",
2863
+ "B"
2864
+ ],
2865
+ [
2866
+ "1fns",
2867
+ "A"
2868
+ ],
2869
+ [
2870
+ "1yy9",
2871
+ "A"
2872
+ ],
2873
+ [
2874
+ "4jqi",
2875
+ "A"
2876
+ ]
2877
+ ]
2878
+ }
data/epitopes/epitopes.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/epitopes/epitopes_13.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/species.json ADDED
@@ -0,0 +1,3562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "4uuj": {
3
+ "chain": "C",
4
+ "organism": "STREPTOMYCES LIVIDANS",
5
+ "classification": "other"
6
+ },
7
+ "7amp": {
8
+ "chain": "B",
9
+ "organism": "Homo sapiens",
10
+ "classification": "human"
11
+ },
12
+ "8f5n": {
13
+ "chain": "A",
14
+ "organism": "Streptococcus pyogenes",
15
+ "classification": "bacterial"
16
+ },
17
+ "5bo1": {
18
+ "chain": "A",
19
+ "organism": "Homo sapiens",
20
+ "classification": "human"
21
+ },
22
+ "1ncc": {
23
+ "chain": "N",
24
+ "organism": "Influenza A virus",
25
+ "classification": "viral"
26
+ },
27
+ "8x0t": {
28
+ "chain": "A",
29
+ "organism": "Homo sapiens",
30
+ "classification": "human"
31
+ },
32
+ "8g4t": {
33
+ "chain": "I",
34
+ "organism": "Human immunodeficiency virus 1",
35
+ "classification": "viral"
36
+ },
37
+ "6o39": {
38
+ "chain": "C",
39
+ "organism": "Homo sapiens",
40
+ "classification": "human"
41
+ },
42
+ "3wfd": {
43
+ "chain": "C",
44
+ "organism": "Pseudomonas aeruginosa",
45
+ "classification": "bacterial"
46
+ },
47
+ "8w85": {
48
+ "chain": "H",
49
+ "organism": "Homo sapiens",
50
+ "classification": "human"
51
+ },
52
+ "6rps": {
53
+ "chain": "A",
54
+ "organism": "Homo sapiens",
55
+ "classification": "human"
56
+ },
57
+ "9ivk": {
58
+ "chain": "A",
59
+ "organism": "artificial sequences",
60
+ "classification": "other"
61
+ },
62
+ "7kql": {
63
+ "chain": "T",
64
+ "organism": "Homo sapiens",
65
+ "classification": "human"
66
+ },
67
+ "7kyo": {
68
+ "chain": "B",
69
+ "organism": "Streptococcus pneumoniae serotype 2 (strain D39 / NCTC 7466)",
70
+ "classification": "bacterial"
71
+ },
72
+ "3o2d": {
73
+ "chain": "A",
74
+ "organism": "Homo sapiens",
75
+ "classification": "human"
76
+ },
77
+ "1uwx": {
78
+ "chain": "B",
79
+ "organism": "STREPTOCOCCUS SP.",
80
+ "classification": "bacterial"
81
+ },
82
+ "2oz4": {
83
+ "chain": "A",
84
+ "organism": "Homo sapiens",
85
+ "classification": "human"
86
+ },
87
+ "6xkq": {
88
+ "chain": "A",
89
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
90
+ "classification": "viral"
91
+ },
92
+ "8d9z": {
93
+ "chain": "D",
94
+ "organism": "Naja nivea",
95
+ "classification": "other"
96
+ },
97
+ "8v52": {
98
+ "chain": "B",
99
+ "organism": "Homo sapiens",
100
+ "classification": "human"
101
+ },
102
+ "4xnm": {
103
+ "chain": "C",
104
+ "organism": "Influenza A virus",
105
+ "classification": "viral"
106
+ },
107
+ "6p67": {
108
+ "chain": "G",
109
+ "organism": "Homo sapiens",
110
+ "classification": "human"
111
+ },
112
+ "6qig": {
113
+ "chain": "A",
114
+ "organism": "Homo sapiens",
115
+ "classification": "human"
116
+ },
117
+ "9dez": {
118
+ "chain": "C",
119
+ "organism": "Porcine deltacoronavirus",
120
+ "classification": "viral"
121
+ },
122
+ "8cdd": {
123
+ "chain": "B",
124
+ "organism": "Plasmodium falciparum 3D7",
125
+ "classification": "other"
126
+ },
127
+ "4qnp": {
128
+ "chain": "A",
129
+ "organism": "Influenza A virus",
130
+ "classification": "viral"
131
+ },
132
+ "4lss": {
133
+ "chain": "G",
134
+ "organism": "Human immunodeficiency virus 1",
135
+ "classification": "viral"
136
+ },
137
+ "4zs7": {
138
+ "chain": "A",
139
+ "organism": "Homo sapiens",
140
+ "classification": "human"
141
+ },
142
+ "9b2w": {
143
+ "chain": "D",
144
+ "organism": "Human respirovirus 3",
145
+ "classification": "viral"
146
+ },
147
+ "6vgr": {
148
+ "chain": "A",
149
+ "organism": "Homo sapiens",
150
+ "classification": "human"
151
+ },
152
+ "4nnp": {
153
+ "chain": "B",
154
+ "organism": "Staphylococcus aureus subsp. aureus",
155
+ "classification": "bacterial"
156
+ },
157
+ "5lsp": {
158
+ "chain": "P",
159
+ "organism": "Homo sapiens",
160
+ "classification": "human"
161
+ },
162
+ "7ec5": {
163
+ "chain": "B",
164
+ "organism": "Human enterovirus D68",
165
+ "classification": "viral"
166
+ },
167
+ "8ffe": {
168
+ "chain": "A",
169
+ "organism": "Homo sapiens",
170
+ "classification": "human"
171
+ },
172
+ "1mhp": {
173
+ "chain": "B",
174
+ "organism": "Rattus norvegicus",
175
+ "classification": "other"
176
+ },
177
+ "4uu9": {
178
+ "chain": "D",
179
+ "organism": "HOMO SAPIENS",
180
+ "classification": "human"
181
+ },
182
+ "7tuf": {
183
+ "chain": "C",
184
+ "organism": "Homo sapiens",
185
+ "classification": "human"
186
+ },
187
+ "7sue": {
188
+ "chain": "D",
189
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
190
+ "classification": "viral"
191
+ },
192
+ "7ul3": {
193
+ "chain": "A",
194
+ "organism": "Homo sapiens",
195
+ "classification": "human"
196
+ },
197
+ "8dk6": {
198
+ "chain": "E",
199
+ "organism": "Hepatitis C virus isolate HC-J6",
200
+ "classification": "viral"
201
+ },
202
+ "8y0q": {
203
+ "chain": "2",
204
+ "organism": "Foot-and-mouth disease virus O",
205
+ "classification": "viral"
206
+ },
207
+ "7upw": {
208
+ "chain": "A",
209
+ "organism": "Severe acute respiratory syndrome coronavirus",
210
+ "classification": "viral"
211
+ },
212
+ "4qti": {
213
+ "chain": "U",
214
+ "organism": "Homo sapiens",
215
+ "classification": "human"
216
+ },
217
+ "4o58": {
218
+ "chain": "A",
219
+ "organism": "Influenza A virus",
220
+ "classification": "viral"
221
+ },
222
+ "4krp": {
223
+ "chain": "A",
224
+ "organism": "Homo sapiens",
225
+ "classification": "human"
226
+ },
227
+ "4yxl": {
228
+ "chain": "A",
229
+ "organism": "Mesocricetus auratus",
230
+ "classification": "other"
231
+ },
232
+ "7lr3": {
233
+ "chain": "C",
234
+ "organism": "Plasmodium berghei",
235
+ "classification": "other"
236
+ },
237
+ "4f2m": {
238
+ "chain": "E",
239
+ "organism": "TGEV virulent Purdue",
240
+ "classification": "other"
241
+ },
242
+ "5jxe": {
243
+ "chain": "B",
244
+ "organism": "Homo sapiens",
245
+ "classification": "human"
246
+ },
247
+ "8tzw": {
248
+ "chain": "C",
249
+ "organism": "Homo sapiens",
250
+ "classification": "human"
251
+ },
252
+ "6elu": {
253
+ "chain": "A",
254
+ "organism": "Trypanosoma brucei rhodesiense",
255
+ "classification": "other"
256
+ },
257
+ "6nyq": {
258
+ "chain": "C",
259
+ "organism": "Mus musculus",
260
+ "classification": "other"
261
+ },
262
+ "9jbq": {
263
+ "chain": "C",
264
+ "organism": "Pseudomonas aeruginosa",
265
+ "classification": "bacterial"
266
+ },
267
+ "7wn8": {
268
+ "chain": "C",
269
+ "organism": "Homo sapiens",
270
+ "classification": "human"
271
+ },
272
+ "7quh": {
273
+ "chain": "A",
274
+ "organism": "Homo sapiens",
275
+ "classification": "human"
276
+ },
277
+ "7o9s": {
278
+ "chain": "A",
279
+ "organism": "Hantaan orthohantavirus",
280
+ "classification": "viral"
281
+ },
282
+ "8pg0": {
283
+ "chain": "A",
284
+ "organism": "Homo sapiens",
285
+ "classification": "human"
286
+ },
287
+ "1g9n": {
288
+ "chain": "G",
289
+ "organism": "Human immunodeficiency virus 1",
290
+ "classification": "viral"
291
+ },
292
+ "5bk2": {
293
+ "chain": "A",
294
+ "organism": "Escherichia coli",
295
+ "classification": "bacterial"
296
+ },
297
+ "7uvf": {
298
+ "chain": "B",
299
+ "organism": "Homo sapiens",
300
+ "classification": "human"
301
+ },
302
+ "6mei": {
303
+ "chain": "C",
304
+ "organism": "Hepacivirus C",
305
+ "classification": "viral"
306
+ },
307
+ "5w3l": {
308
+ "chain": "B",
309
+ "organism": "Human rhinovirus 14",
310
+ "classification": "viral"
311
+ },
312
+ "8tco": {
313
+ "chain": "C",
314
+ "organism": "Human betaherpesvirus 5",
315
+ "classification": "viral"
316
+ },
317
+ "3u9p": {
318
+ "chain": "C",
319
+ "organism": "Mus musculus",
320
+ "classification": "other"
321
+ },
322
+ "6wio": {
323
+ "chain": "C",
324
+ "organism": "Homo sapiens",
325
+ "classification": "human"
326
+ },
327
+ "8vzn": {
328
+ "chain": "A",
329
+ "organism": "Mus musculus",
330
+ "classification": "other"
331
+ },
332
+ "5bv7": {
333
+ "chain": "A",
334
+ "organism": "Homo sapiens",
335
+ "classification": "human"
336
+ },
337
+ "7jkt": {
338
+ "chain": "G",
339
+ "organism": "Human immunodeficiency virus 1",
340
+ "classification": "viral"
341
+ },
342
+ "7x28": {
343
+ "chain": "F",
344
+ "organism": "Middle East respiratory syndrome-related coronavirus",
345
+ "classification": "viral"
346
+ },
347
+ "6xxv": {
348
+ "chain": "C",
349
+ "organism": "Homo sapiens",
350
+ "classification": "human"
351
+ },
352
+ "3ld8": {
353
+ "chain": "A",
354
+ "organism": "Homo sapiens",
355
+ "classification": "human"
356
+ },
357
+ "4lst": {
358
+ "chain": "G",
359
+ "organism": "Human immunodeficiency virus 1",
360
+ "classification": "viral"
361
+ },
362
+ "4d3c": {
363
+ "chain": "A",
364
+ "organism": "HOMO SAPIENS",
365
+ "classification": "human"
366
+ },
367
+ "8a44": {
368
+ "chain": "A",
369
+ "organism": "Plasmodium vivax",
370
+ "classification": "other"
371
+ },
372
+ "4hwb": {
373
+ "chain": "A",
374
+ "organism": "Homo sapiens",
375
+ "classification": "human"
376
+ },
377
+ "3v6o": {
378
+ "chain": "B",
379
+ "organism": "Homo sapiens",
380
+ "classification": "human"
381
+ },
382
+ "6ogx": {
383
+ "chain": "G",
384
+ "organism": "Homo sapiens",
385
+ "classification": "human"
386
+ },
387
+ "2xra": {
388
+ "chain": "A",
389
+ "organism": "SYNTHETIC CONSTRUCT",
390
+ "classification": "other"
391
+ },
392
+ "6q20": {
393
+ "chain": "A",
394
+ "organism": "Influenza A virus (strain A/Japan/305/1957 H2N2)",
395
+ "classification": "viral"
396
+ },
397
+ "4jqi": {
398
+ "chain": "A",
399
+ "organism": "Rattus norvegicus",
400
+ "classification": "other"
401
+ },
402
+ "5ush": {
403
+ "chain": "X",
404
+ "organism": "Vaccinia virus",
405
+ "classification": "viral"
406
+ },
407
+ "3hae": {
408
+ "chain": "K",
409
+ "organism": "Homo sapiens",
410
+ "classification": "human"
411
+ },
412
+ "2hmi": {
413
+ "chain": "B",
414
+ "organism": "Human immunodeficiency virus 1",
415
+ "classification": "viral"
416
+ },
417
+ "3vg9": {
418
+ "chain": "A",
419
+ "organism": "Homo sapiens",
420
+ "classification": "human"
421
+ },
422
+ "6h2y": {
423
+ "chain": "D",
424
+ "organism": "Neisseria meningitidis",
425
+ "classification": "other"
426
+ },
427
+ "8u03": {
428
+ "chain": "C",
429
+ "organism": "Homo sapiens",
430
+ "classification": "human"
431
+ },
432
+ "6bkc": {
433
+ "chain": "E",
434
+ "organism": "Recombinant Hepatitis C virus HK6a/JFH-1",
435
+ "classification": "viral"
436
+ },
437
+ "6z3q": {
438
+ "chain": "C",
439
+ "organism": "Enterovirus A71",
440
+ "classification": "viral"
441
+ },
442
+ "8dn6": {
443
+ "chain": "A",
444
+ "organism": "Arabidopsis thaliana",
445
+ "classification": "other"
446
+ },
447
+ "4qci": {
448
+ "chain": "D",
449
+ "organism": "Homo sapiens",
450
+ "classification": "human"
451
+ },
452
+ "3d85": {
453
+ "chain": "C",
454
+ "organism": "Homo sapiens",
455
+ "classification": "human"
456
+ },
457
+ "3hi6": {
458
+ "chain": "A",
459
+ "organism": "Homo sapiens",
460
+ "classification": "human"
461
+ },
462
+ "8c7h": {
463
+ "chain": "D",
464
+ "organism": "Homo sapiens",
465
+ "classification": "human"
466
+ },
467
+ "7mrz": {
468
+ "chain": "C",
469
+ "organism": "Homo sapiens",
470
+ "classification": "human"
471
+ },
472
+ "7zqt": {
473
+ "chain": "E",
474
+ "organism": "Helicobacter pylori",
475
+ "classification": "other"
476
+ },
477
+ "7ued": {
478
+ "chain": "M",
479
+ "organism": "Homo sapiens",
480
+ "classification": "human"
481
+ },
482
+ "6wgl": {
483
+ "chain": "C",
484
+ "organism": "Homo sapiens",
485
+ "classification": "human"
486
+ },
487
+ "1nsn": {
488
+ "chain": "S",
489
+ "organism": "Staphylococcus aureus",
490
+ "classification": "bacterial"
491
+ },
492
+ "8y31": {
493
+ "chain": "E",
494
+ "organism": "Homo sapiens",
495
+ "classification": "human"
496
+ },
497
+ "6pe8": {
498
+ "chain": "U",
499
+ "organism": "Homo sapiens",
500
+ "classification": "human"
501
+ },
502
+ "1xiw": {
503
+ "chain": "B",
504
+ "organism": "Homo sapiens",
505
+ "classification": "human"
506
+ },
507
+ "7wg3": {
508
+ "chain": "K",
509
+ "organism": "Bos taurus",
510
+ "classification": "other"
511
+ },
512
+ "8dn7": {
513
+ "chain": "C",
514
+ "organism": "Pisum sativum",
515
+ "classification": "other"
516
+ },
517
+ "2hfg": {
518
+ "chain": "R",
519
+ "organism": "Homo sapiens",
520
+ "classification": "human"
521
+ },
522
+ "5xez": {
523
+ "chain": "A",
524
+ "organism": "Homo sapiens",
525
+ "classification": "human"
526
+ },
527
+ "8v91": {
528
+ "chain": "B",
529
+ "organism": "Homo sapiens",
530
+ "classification": "human"
531
+ },
532
+ "2j4w": {
533
+ "chain": "D",
534
+ "organism": "PLASMODIUM VIVAX",
535
+ "classification": "other"
536
+ },
537
+ "8e8x": {
538
+ "chain": "1",
539
+ "organism": "Human poliovirus 3 strain Sabin",
540
+ "classification": "viral"
541
+ },
542
+ "1kb5": {
543
+ "chain": "A",
544
+ "organism": "Mus musculus",
545
+ "classification": "other"
546
+ },
547
+ "6gv4": {
548
+ "chain": "B",
549
+ "organism": "Human parechovirus 3",
550
+ "classification": "viral"
551
+ },
552
+ "8sxp": {
553
+ "chain": "C",
554
+ "organism": "Ophiophagus hannah",
555
+ "classification": "other"
556
+ },
557
+ "6ztr": {
558
+ "chain": "I",
559
+ "organism": "Homo sapiens",
560
+ "classification": "human"
561
+ },
562
+ "6cw3": {
563
+ "chain": "F",
564
+ "organism": "Saccharomyces cerevisiae S288c",
565
+ "classification": "fungal"
566
+ },
567
+ "8tq9": {
568
+ "chain": "B",
569
+ "organism": "Mus musculus",
570
+ "classification": "other"
571
+ },
572
+ "6wfw": {
573
+ "chain": "G",
574
+ "organism": "Streptococcus sp. group G",
575
+ "classification": "bacterial"
576
+ },
577
+ "8ee0": {
578
+ "chain": "A",
579
+ "organism": "Saccharopolyspora erythraea",
580
+ "classification": "other"
581
+ },
582
+ "3lev": {
583
+ "chain": "A",
584
+ "organism": "Thermus aquaticus",
585
+ "classification": "other"
586
+ },
587
+ "6y9b": {
588
+ "chain": "C",
589
+ "organism": "Homo sapiens",
590
+ "classification": "human"
591
+ },
592
+ "7v23": {
593
+ "chain": "A",
594
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
595
+ "classification": "viral"
596
+ },
597
+ "8bk2": {
598
+ "chain": "B",
599
+ "organism": "Neisseria meningitidis serogroup B",
600
+ "classification": "other"
601
+ },
602
+ "5h35": {
603
+ "chain": "C",
604
+ "organism": "Sulfolobus solfataricus",
605
+ "classification": "other"
606
+ },
607
+ "5ldn": {
608
+ "chain": "A",
609
+ "organism": "Human adenovirus C serotype 5",
610
+ "classification": "viral"
611
+ },
612
+ "1ors": {
613
+ "chain": "C",
614
+ "organism": "Aeropyrum pernix",
615
+ "classification": "other"
616
+ },
617
+ "6fgb": {
618
+ "chain": "A",
619
+ "organism": "Homo sapiens",
620
+ "classification": "human"
621
+ },
622
+ "7tyv": {
623
+ "chain": "B",
624
+ "organism": "Lassa virus",
625
+ "classification": "viral"
626
+ },
627
+ "3qwo": {
628
+ "chain": "C",
629
+ "organism": "Staphylococcus aureus",
630
+ "classification": "bacterial"
631
+ },
632
+ "7n8q": {
633
+ "chain": "A",
634
+ "organism": "Human immunodeficiency virus 1",
635
+ "classification": "viral"
636
+ },
637
+ "4ye4": {
638
+ "chain": "G",
639
+ "organism": "Human immunodeficiency virus",
640
+ "classification": "viral"
641
+ },
642
+ "3uc0": {
643
+ "chain": "B",
644
+ "organism": "Dengue virus 4",
645
+ "classification": "viral"
646
+ },
647
+ "6mlk": {
648
+ "chain": "A",
649
+ "organism": "Saccharopolyspora erythraea",
650
+ "classification": "other"
651
+ },
652
+ "6hga": {
653
+ "chain": "B",
654
+ "organism": "Homo sapiens",
655
+ "classification": "human"
656
+ },
657
+ "1fns": {
658
+ "chain": "A",
659
+ "organism": "Homo sapiens",
660
+ "classification": "human"
661
+ },
662
+ "7n0u": {
663
+ "chain": "C",
664
+ "organism": "Betula pendula",
665
+ "classification": "other"
666
+ },
667
+ "8db4": {
668
+ "chain": "E",
669
+ "organism": "Arachis hypogaea",
670
+ "classification": "other"
671
+ },
672
+ "6id4": {
673
+ "chain": "E",
674
+ "organism": "Homo sapiens",
675
+ "classification": "human"
676
+ },
677
+ "6svl": {
678
+ "chain": "N",
679
+ "organism": "Homo sapiens",
680
+ "classification": "human"
681
+ },
682
+ "4wfg": {
683
+ "chain": "B",
684
+ "organism": "Homo sapiens",
685
+ "classification": "human"
686
+ },
687
+ "7shu": {
688
+ "chain": "A",
689
+ "organism": "Homo sapiens",
690
+ "classification": "human"
691
+ },
692
+ "8k3c": {
693
+ "chain": "B",
694
+ "organism": "Henipavirus nipahense",
695
+ "classification": "viral"
696
+ },
697
+ "5e94": {
698
+ "chain": "G",
699
+ "organism": "Homo sapiens",
700
+ "classification": "human"
701
+ },
702
+ "8byu": {
703
+ "chain": "A",
704
+ "organism": "Homo sapiens",
705
+ "classification": "human"
706
+ },
707
+ "4cad": {
708
+ "chain": "I",
709
+ "organism": "METHANOCOCCUS MARIPALUDIS",
710
+ "classification": "other"
711
+ },
712
+ "4o9h": {
713
+ "chain": "A",
714
+ "organism": "Homo sapiens",
715
+ "classification": "human"
716
+ },
717
+ "5utz": {
718
+ "chain": "A",
719
+ "organism": "Homo sapiens",
720
+ "classification": "human"
721
+ },
722
+ "5eu7": {
723
+ "chain": "B",
724
+ "organism": "Human immunodeficiency virus 1",
725
+ "classification": "viral"
726
+ },
727
+ "8e8r": {
728
+ "chain": "2",
729
+ "organism": "Human poliovirus 3 strain Sabin",
730
+ "classification": "viral"
731
+ },
732
+ "8uky": {
733
+ "chain": "D",
734
+ "organism": "Homo sapiens",
735
+ "classification": "human"
736
+ },
737
+ "9dh2": {
738
+ "chain": "T",
739
+ "organism": "Homo sapiens",
740
+ "classification": "human"
741
+ },
742
+ "7x8q": {
743
+ "chain": "D",
744
+ "organism": "Homo sapiens",
745
+ "classification": "human"
746
+ },
747
+ "8yxi": {
748
+ "chain": "A",
749
+ "organism": "Severe fever with thrombocytopenia syndrome virus",
750
+ "classification": "viral"
751
+ },
752
+ "5mo9": {
753
+ "chain": "X",
754
+ "organism": "Homo sapiens",
755
+ "classification": "human"
756
+ },
757
+ "3skj": {
758
+ "chain": "F",
759
+ "organism": "Homo sapiens",
760
+ "classification": "human"
761
+ },
762
+ "3grw": {
763
+ "chain": "A",
764
+ "organism": "Homo sapiens",
765
+ "classification": "human"
766
+ },
767
+ "7xw6": {
768
+ "chain": "R",
769
+ "organism": "Homo sapiens",
770
+ "classification": "human"
771
+ },
772
+ "6wzk": {
773
+ "chain": "E",
774
+ "organism": "Macaca fascicularis",
775
+ "classification": "other"
776
+ },
777
+ "8u2c": {
778
+ "chain": "A",
779
+ "organism": "Homo sapiens",
780
+ "classification": "human"
781
+ },
782
+ "6ks0": {
783
+ "chain": "A",
784
+ "organism": "Homo sapiens",
785
+ "classification": "human"
786
+ },
787
+ "7yms": {
788
+ "chain": "A",
789
+ "organism": "Coxsackievirus A16",
790
+ "classification": "viral"
791
+ },
792
+ "8j8r": {
793
+ "chain": "B",
794
+ "organism": "Bos taurus",
795
+ "classification": "other"
796
+ },
797
+ "6ba5": {
798
+ "chain": "N",
799
+ "organism": "Homo sapiens",
800
+ "classification": "human"
801
+ },
802
+ "7daa": {
803
+ "chain": "A",
804
+ "organism": "Homo sapiens",
805
+ "classification": "human"
806
+ },
807
+ "1dee": {
808
+ "chain": "H",
809
+ "organism": "Staphylococcus aureus",
810
+ "classification": "bacterial"
811
+ },
812
+ "7ox3": {
813
+ "chain": "C",
814
+ "organism": "Homo sapiens",
815
+ "classification": "human"
816
+ },
817
+ "6was": {
818
+ "chain": "G",
819
+ "organism": "Homo sapiens",
820
+ "classification": "human"
821
+ },
822
+ "8tq8": {
823
+ "chain": "C",
824
+ "organism": "Mus musculus",
825
+ "classification": "other"
826
+ },
827
+ "4yxk": {
828
+ "chain": "A",
829
+ "organism": "Cervus elaphus nelsoni",
830
+ "classification": "other"
831
+ },
832
+ "6u36": {
833
+ "chain": "B",
834
+ "organism": "Homo sapiens",
835
+ "classification": "human"
836
+ },
837
+ "2qqn": {
838
+ "chain": "A",
839
+ "organism": "Homo sapiens",
840
+ "classification": "human"
841
+ },
842
+ "3cvh": {
843
+ "chain": "M",
844
+ "organism": "Mus musculus",
845
+ "classification": "other"
846
+ },
847
+ "4jpk": {
848
+ "chain": "A",
849
+ "organism": "Human immunodeficiency virus 1",
850
+ "classification": "viral"
851
+ },
852
+ "1v7m": {
853
+ "chain": "V",
854
+ "organism": "Homo sapiens",
855
+ "classification": "human"
856
+ },
857
+ "2ybr": {
858
+ "chain": "I",
859
+ "organism": "CENTRUROIDES NOXIUS HOFFMANN",
860
+ "classification": "other"
861
+ },
862
+ "4ht1": {
863
+ "chain": "T",
864
+ "organism": "Homo sapiens",
865
+ "classification": "human"
866
+ },
867
+ "4bz2": {
868
+ "chain": "A",
869
+ "organism": "DENGUE VIRUS 4",
870
+ "classification": "viral"
871
+ },
872
+ "8hhy": {
873
+ "chain": "A",
874
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
875
+ "classification": "viral"
876
+ },
877
+ "5epm": {
878
+ "chain": "C",
879
+ "organism": "Ceratogyrus marshalli",
880
+ "classification": "other"
881
+ },
882
+ "7lxx": {
883
+ "chain": "A",
884
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
885
+ "classification": "viral"
886
+ },
887
+ "2zch": {
888
+ "chain": "P",
889
+ "organism": "Homo sapiens",
890
+ "classification": "human"
891
+ },
892
+ "6nms": {
893
+ "chain": "C",
894
+ "organism": "Homo sapiens",
895
+ "classification": "human"
896
+ },
897
+ "6vep": {
898
+ "chain": "Q",
899
+ "organism": "Homo sapiens",
900
+ "classification": "human"
901
+ },
902
+ "8tv3": {
903
+ "chain": "A",
904
+ "organism": "Borreliella burgdorferi B31",
905
+ "classification": "other"
906
+ },
907
+ "6m3b": {
908
+ "chain": "A",
909
+ "organism": "Homo sapiens",
910
+ "classification": "human"
911
+ },
912
+ "7zoz": {
913
+ "chain": "A",
914
+ "organism": "Homo sapiens",
915
+ "classification": "human"
916
+ },
917
+ "4rwy": {
918
+ "chain": "A",
919
+ "organism": "Human immunodeficiency virus 1",
920
+ "classification": "viral"
921
+ },
922
+ "3u4e": {
923
+ "chain": "J",
924
+ "organism": "Human immunodeficiency virus 1",
925
+ "classification": "viral"
926
+ },
927
+ "4liq": {
928
+ "chain": "E",
929
+ "organism": "Homo sapiens",
930
+ "classification": "human"
931
+ },
932
+ "7rew": {
933
+ "chain": "I",
934
+ "organism": "Macaca fascicularis",
935
+ "classification": "other"
936
+ },
937
+ "8tlm": {
938
+ "chain": "C",
939
+ "organism": "Homo sapiens",
940
+ "classification": "human"
941
+ },
942
+ "8vsj": {
943
+ "chain": "A",
944
+ "organism": "Homo sapiens",
945
+ "classification": "human"
946
+ },
947
+ "6ks1": {
948
+ "chain": "A",
949
+ "organism": "Homo sapiens",
950
+ "classification": "human"
951
+ },
952
+ "4leo": {
953
+ "chain": "C",
954
+ "organism": "Homo sapiens",
955
+ "classification": "human"
956
+ },
957
+ "7s13": {
958
+ "chain": "C",
959
+ "organism": "Mus musculus",
960
+ "classification": "other"
961
+ },
962
+ "6pi7": {
963
+ "chain": "A",
964
+ "organism": "Homo sapiens",
965
+ "classification": "human"
966
+ },
967
+ "1kyo": {
968
+ "chain": "P",
969
+ "organism": "Saccharomyces cerevisiae",
970
+ "classification": "fungal"
971
+ },
972
+ "8fdo": {
973
+ "chain": "C",
974
+ "organism": "Escherichia coli",
975
+ "classification": "bacterial"
976
+ },
977
+ "6u6u": {
978
+ "chain": "R",
979
+ "organism": "Homo sapiens",
980
+ "classification": "human"
981
+ },
982
+ "8wre": {
983
+ "chain": "A",
984
+ "organism": "Homo sapiens",
985
+ "classification": "human"
986
+ },
987
+ "7s7i": {
988
+ "chain": "B",
989
+ "organism": "Homo sapiens",
990
+ "classification": "human"
991
+ },
992
+ "7lf8": {
993
+ "chain": "A",
994
+ "organism": "Homo sapiens",
995
+ "classification": "human"
996
+ },
997
+ "6phb": {
998
+ "chain": "I",
999
+ "organism": "Plasmodium falciparum",
1000
+ "classification": "other"
1001
+ },
1002
+ "8fgx": {
1003
+ "chain": "C",
1004
+ "organism": "Homo sapiens",
1005
+ "classification": "human"
1006
+ },
1007
+ "3gbm": {
1008
+ "chain": "A",
1009
+ "organism": "Influenza A virus (A/Viet Nam/1203/2004(H5N1))",
1010
+ "classification": "viral"
1011
+ },
1012
+ "2j5l": {
1013
+ "chain": "A",
1014
+ "organism": "PLASMODIUM FALCIPARUM",
1015
+ "classification": "other"
1016
+ },
1017
+ "7ce2": {
1018
+ "chain": "A",
1019
+ "organism": "Clostridium tetani",
1020
+ "classification": "other"
1021
+ },
1022
+ "6bf4": {
1023
+ "chain": "A",
1024
+ "organism": "Human immunodeficiency virus 1",
1025
+ "classification": "viral"
1026
+ },
1027
+ "2xqy": {
1028
+ "chain": "E",
1029
+ "organism": "SUID HERPESVIRUS",
1030
+ "classification": "viral"
1031
+ },
1032
+ "7q0i": {
1033
+ "chain": "D",
1034
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1035
+ "classification": "viral"
1036
+ },
1037
+ "6mug": {
1038
+ "chain": "G",
1039
+ "organism": "Human immunodeficiency virus 1",
1040
+ "classification": "viral"
1041
+ },
1042
+ "4yue": {
1043
+ "chain": "C",
1044
+ "organism": "Mus musculus",
1045
+ "classification": "other"
1046
+ },
1047
+ "4qww": {
1048
+ "chain": "B",
1049
+ "organism": "Bungarus fasciatus",
1050
+ "classification": "other"
1051
+ },
1052
+ "6bfq": {
1053
+ "chain": "G",
1054
+ "organism": "Homo sapiens",
1055
+ "classification": "human"
1056
+ },
1057
+ "8too": {
1058
+ "chain": "I",
1059
+ "organism": "Epstein-Barr virus",
1060
+ "classification": "viral"
1061
+ },
1062
+ "7dm2": {
1063
+ "chain": "A",
1064
+ "organism": "Mycobacterium tuberculosis H37Rv",
1065
+ "classification": "bacterial"
1066
+ },
1067
+ "8iuy": {
1068
+ "chain": "A",
1069
+ "organism": "H7N9 subtype",
1070
+ "classification": "other"
1071
+ },
1072
+ "6wtu": {
1073
+ "chain": "A",
1074
+ "organism": "Plasmodium vivax (strain Salvador I)",
1075
+ "classification": "other"
1076
+ },
1077
+ "8u1c": {
1078
+ "chain": "A",
1079
+ "organism": "Influenza B virus (B/Iowa/06/2017)",
1080
+ "classification": "viral"
1081
+ },
1082
+ "7a0x": {
1083
+ "chain": "B",
1084
+ "organism": "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)",
1085
+ "classification": "bacterial"
1086
+ },
1087
+ "7txt": {
1088
+ "chain": "S",
1089
+ "organism": "Homo sapiens",
1090
+ "classification": "human"
1091
+ },
1092
+ "3liz": {
1093
+ "chain": "A",
1094
+ "organism": "Blattella germanica",
1095
+ "classification": "other"
1096
+ },
1097
+ "6ppg": {
1098
+ "chain": "G",
1099
+ "organism": "Homo sapiens",
1100
+ "classification": "human"
1101
+ },
1102
+ "6nnf": {
1103
+ "chain": "G",
1104
+ "organism": "Human immunodeficiency virus 1",
1105
+ "classification": "viral"
1106
+ },
1107
+ "4ma7": {
1108
+ "chain": "A",
1109
+ "organism": "Mus musculus",
1110
+ "classification": "other"
1111
+ },
1112
+ "8djg": {
1113
+ "chain": "F",
1114
+ "organism": "Homo sapiens",
1115
+ "classification": "human"
1116
+ },
1117
+ "7kd6": {
1118
+ "chain": "W",
1119
+ "organism": "Homo sapiens",
1120
+ "classification": "human"
1121
+ },
1122
+ "4m1g": {
1123
+ "chain": "B",
1124
+ "organism": "Vaccinia virus",
1125
+ "classification": "viral"
1126
+ },
1127
+ "8sic": {
1128
+ "chain": "G",
1129
+ "organism": "Human herpesvirus 4",
1130
+ "classification": "viral"
1131
+ },
1132
+ "5tud": {
1133
+ "chain": "A",
1134
+ "organism": "Homo sapiens",
1135
+ "classification": "human"
1136
+ },
1137
+ "6apb": {
1138
+ "chain": "C",
1139
+ "organism": "Human respiratory syncytial virus",
1140
+ "classification": "viral"
1141
+ },
1142
+ "4a6y": {
1143
+ "chain": "A",
1144
+ "organism": "MUS MUSCULUS",
1145
+ "classification": "other"
1146
+ },
1147
+ "1oaz": {
1148
+ "chain": "A",
1149
+ "organism": "ESCHERICHIA COLI",
1150
+ "classification": "bacterial"
1151
+ },
1152
+ "5vic": {
1153
+ "chain": "E",
1154
+ "organism": "Dengue virus type 1 (strain Nauru/West Pac/1974)",
1155
+ "classification": "viral"
1156
+ },
1157
+ "6k65": {
1158
+ "chain": "A",
1159
+ "organism": "Staphylococcus aureus (strain NCTC 8325)",
1160
+ "classification": "bacterial"
1161
+ },
1162
+ "8r1d": {
1163
+ "chain": "C",
1164
+ "organism": "Homo sapiens",
1165
+ "classification": "human"
1166
+ },
1167
+ "1lk3": {
1168
+ "chain": "B",
1169
+ "organism": "Homo sapiens",
1170
+ "classification": "human"
1171
+ },
1172
+ "6meh": {
1173
+ "chain": "C",
1174
+ "organism": "Hepacivirus C",
1175
+ "classification": "viral"
1176
+ },
1177
+ "7e7x": {
1178
+ "chain": "A",
1179
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1180
+ "classification": "viral"
1181
+ },
1182
+ "6wzl": {
1183
+ "chain": "E",
1184
+ "organism": "Macaca fascicularis",
1185
+ "classification": "other"
1186
+ },
1187
+ "6j14": {
1188
+ "chain": "G",
1189
+ "organism": "Homo sapiens",
1190
+ "classification": "human"
1191
+ },
1192
+ "4hc1": {
1193
+ "chain": "A",
1194
+ "organism": "Homo sapiens",
1195
+ "classification": "human"
1196
+ },
1197
+ "1yy9": {
1198
+ "chain": "A",
1199
+ "organism": "Homo sapiens",
1200
+ "classification": "human"
1201
+ },
1202
+ "8v2e": {
1203
+ "chain": "C",
1204
+ "organism": "synthetic construct",
1205
+ "classification": "other"
1206
+ },
1207
+ "6wh9": {
1208
+ "chain": "D",
1209
+ "organism": "Saccharopolyspora erythraea",
1210
+ "classification": "other"
1211
+ },
1212
+ "5hbv": {
1213
+ "chain": "B",
1214
+ "organism": "Mus musculus",
1215
+ "classification": "other"
1216
+ },
1217
+ "5wt9": {
1218
+ "chain": "G",
1219
+ "organism": "Homo sapiens",
1220
+ "classification": "human"
1221
+ },
1222
+ "5fcu": {
1223
+ "chain": "G",
1224
+ "organism": "Human immunodeficiency virus 1",
1225
+ "classification": "viral"
1226
+ },
1227
+ "6dkj": {
1228
+ "chain": "D",
1229
+ "organism": "Homo sapiens",
1230
+ "classification": "human"
1231
+ },
1232
+ "6a3w": {
1233
+ "chain": "L",
1234
+ "organism": "Homo sapiens",
1235
+ "classification": "human"
1236
+ },
1237
+ "4xp9": {
1238
+ "chain": "C",
1239
+ "organism": "Drosophila melanogaster",
1240
+ "classification": "other"
1241
+ },
1242
+ "4ogy": {
1243
+ "chain": "A",
1244
+ "organism": "Homo sapiens",
1245
+ "classification": "human"
1246
+ },
1247
+ "7k7h": {
1248
+ "chain": "A",
1249
+ "organism": "Salmonella enterica subsp. enterica serovar Typhi str. CT18",
1250
+ "classification": "bacterial"
1251
+ },
1252
+ "7rp2": {
1253
+ "chain": "A",
1254
+ "organism": "Homo sapiens",
1255
+ "classification": "human"
1256
+ },
1257
+ "3q3g": {
1258
+ "chain": "I",
1259
+ "organism": "Homo sapiens",
1260
+ "classification": "human"
1261
+ },
1262
+ "6mto": {
1263
+ "chain": "T",
1264
+ "organism": "Human immunodeficiency virus 1",
1265
+ "classification": "viral"
1266
+ },
1267
+ "5w4l": {
1268
+ "chain": "G",
1269
+ "organism": "Human immunodeficiency virus 1",
1270
+ "classification": "viral"
1271
+ },
1272
+ "7bsc": {
1273
+ "chain": "A",
1274
+ "organism": "Dengue virus 2",
1275
+ "classification": "viral"
1276
+ },
1277
+ "5vpl": {
1278
+ "chain": "A",
1279
+ "organism": "Dermatophagoides farinae",
1280
+ "classification": "other"
1281
+ },
1282
+ "4zso": {
1283
+ "chain": "E",
1284
+ "organism": "Homo sapiens",
1285
+ "classification": "human"
1286
+ },
1287
+ "5e8d": {
1288
+ "chain": "A",
1289
+ "organism": "Homo sapiens",
1290
+ "classification": "human"
1291
+ },
1292
+ "6umg": {
1293
+ "chain": "C",
1294
+ "organism": "Homo sapiens",
1295
+ "classification": "human"
1296
+ },
1297
+ "2yss": {
1298
+ "chain": "C",
1299
+ "organism": "Gallus gallus",
1300
+ "classification": "other"
1301
+ },
1302
+ "6xsw": {
1303
+ "chain": "X",
1304
+ "organism": "Homo sapiens",
1305
+ "classification": "human"
1306
+ },
1307
+ "6kz0": {
1308
+ "chain": "J",
1309
+ "organism": "Human rhinovirus 14",
1310
+ "classification": "viral"
1311
+ },
1312
+ "6wzj": {
1313
+ "chain": "E",
1314
+ "organism": "Macaca fascicularis",
1315
+ "classification": "other"
1316
+ },
1317
+ "4fqj": {
1318
+ "chain": "A",
1319
+ "organism": "Influenza B virus",
1320
+ "classification": "viral"
1321
+ },
1322
+ "6uym": {
1323
+ "chain": "E",
1324
+ "organism": "Hepatitis C virus (isolate H)",
1325
+ "classification": "viral"
1326
+ },
1327
+ "2xqb": {
1328
+ "chain": "A",
1329
+ "organism": "Homo sapiens",
1330
+ "classification": "human"
1331
+ },
1332
+ "8oxv": {
1333
+ "chain": "A",
1334
+ "organism": "Homo sapiens",
1335
+ "classification": "human"
1336
+ },
1337
+ "5b3j": {
1338
+ "chain": "C",
1339
+ "organism": "Rattus norvegicus",
1340
+ "classification": "other"
1341
+ },
1342
+ "7ox1": {
1343
+ "chain": "X",
1344
+ "organism": "Homo sapiens",
1345
+ "classification": "human"
1346
+ },
1347
+ "9dwe": {
1348
+ "chain": "A",
1349
+ "organism": "Influenza A virus",
1350
+ "classification": "viral"
1351
+ },
1352
+ "3lhp": {
1353
+ "chain": "S",
1354
+ "organism": "Artificial gene",
1355
+ "classification": "other"
1356
+ },
1357
+ "8cz5": {
1358
+ "chain": "A",
1359
+ "organism": "Lagovirus",
1360
+ "classification": "viral"
1361
+ },
1362
+ "7mjk": {
1363
+ "chain": "C",
1364
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1365
+ "classification": "viral"
1366
+ },
1367
+ "8txp": {
1368
+ "chain": "A",
1369
+ "organism": "Influenza A virus",
1370
+ "classification": "viral"
1371
+ },
1372
+ "6al5": {
1373
+ "chain": "A",
1374
+ "organism": "Homo sapiens",
1375
+ "classification": "human"
1376
+ },
1377
+ "8sgi": {
1378
+ "chain": "A",
1379
+ "organism": "Homo sapiens",
1380
+ "classification": "human"
1381
+ },
1382
+ "3l95": {
1383
+ "chain": "X",
1384
+ "organism": "Homo sapiens",
1385
+ "classification": "human"
1386
+ },
1387
+ "3tje": {
1388
+ "chain": "F",
1389
+ "organism": "Homo sapiens",
1390
+ "classification": "human"
1391
+ },
1392
+ "3hmx": {
1393
+ "chain": "A",
1394
+ "organism": "Homo sapiens",
1395
+ "classification": "human"
1396
+ },
1397
+ "7l0l": {
1398
+ "chain": "E",
1399
+ "organism": "Influenza A virus (A/Canada/720/2005(H2N2))",
1400
+ "classification": "viral"
1401
+ },
1402
+ "8aci": {
1403
+ "chain": "A",
1404
+ "organism": "Homo sapiens",
1405
+ "classification": "human"
1406
+ },
1407
+ "1nfd": {
1408
+ "chain": "D",
1409
+ "organism": "Mus musculus",
1410
+ "classification": "other"
1411
+ },
1412
+ "3vi4": {
1413
+ "chain": "D",
1414
+ "organism": "Homo sapiens",
1415
+ "classification": "human"
1416
+ },
1417
+ "3wih": {
1418
+ "chain": "A",
1419
+ "organism": "Homo sapiens",
1420
+ "classification": "human"
1421
+ },
1422
+ "7so5": {
1423
+ "chain": "A",
1424
+ "organism": "Clostridioides difficile R20291",
1425
+ "classification": "other"
1426
+ },
1427
+ "3r1g": {
1428
+ "chain": "B",
1429
+ "organism": "Homo sapiens",
1430
+ "classification": "human"
1431
+ },
1432
+ "1bgx": {
1433
+ "chain": "T",
1434
+ "organism": "Thermus aquaticus",
1435
+ "classification": "other"
1436
+ },
1437
+ "9c44": {
1438
+ "chain": "C",
1439
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1440
+ "classification": "viral"
1441
+ },
1442
+ "6ewb": {
1443
+ "chain": "C",
1444
+ "organism": "Norovirus Hu/GII.4/Sydney/NSW0514/2012/AU",
1445
+ "classification": "viral"
1446
+ },
1447
+ "5nh3": {
1448
+ "chain": "B",
1449
+ "organism": "Homo sapiens",
1450
+ "classification": "human"
1451
+ },
1452
+ "4yqx": {
1453
+ "chain": "M",
1454
+ "organism": "Mus musculus",
1455
+ "classification": "other"
1456
+ },
1457
+ "9dx6": {
1458
+ "chain": "A",
1459
+ "organism": "Plasmodium vivax",
1460
+ "classification": "other"
1461
+ },
1462
+ "5kn5": {
1463
+ "chain": "C",
1464
+ "organism": "Homo sapiens",
1465
+ "classification": "human"
1466
+ },
1467
+ "3ru8": {
1468
+ "chain": "X",
1469
+ "organism": "Human immunodeficiency virus",
1470
+ "classification": "viral"
1471
+ },
1472
+ "7lxw": {
1473
+ "chain": "A",
1474
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1475
+ "classification": "viral"
1476
+ },
1477
+ "3gbn": {
1478
+ "chain": "A",
1479
+ "organism": "Influenza A virus (A/Brevig Mission/1/1918(H1N1))",
1480
+ "classification": "viral"
1481
+ },
1482
+ "7rt9": {
1483
+ "chain": "Y",
1484
+ "organism": "Homo sapiens",
1485
+ "classification": "human"
1486
+ },
1487
+ "6yio": {
1488
+ "chain": "B",
1489
+ "organism": "Homo sapiens",
1490
+ "classification": "human"
1491
+ },
1492
+ "4ywg": {
1493
+ "chain": "Q",
1494
+ "organism": "Human immunodeficiency virus 1",
1495
+ "classification": "viral"
1496
+ },
1497
+ "3s35": {
1498
+ "chain": "X",
1499
+ "organism": "Homo sapiens",
1500
+ "classification": "human"
1501
+ },
1502
+ "6uyd": {
1503
+ "chain": "F",
1504
+ "organism": "Hepatitis C virus (isolate H)",
1505
+ "classification": "viral"
1506
+ },
1507
+ "5u8r": {
1508
+ "chain": "A",
1509
+ "organism": "Homo sapiens",
1510
+ "classification": "human"
1511
+ },
1512
+ "3kr3": {
1513
+ "chain": "D",
1514
+ "organism": "Homo sapiens",
1515
+ "classification": "human"
1516
+ },
1517
+ "6a77": {
1518
+ "chain": "A",
1519
+ "organism": "Homo sapiens",
1520
+ "classification": "human"
1521
+ },
1522
+ "7zxk": {
1523
+ "chain": "C",
1524
+ "organism": "Homo sapiens",
1525
+ "classification": "human"
1526
+ },
1527
+ "6ion": {
1528
+ "chain": "A",
1529
+ "organism": "Homo sapiens",
1530
+ "classification": "human"
1531
+ },
1532
+ "6wit": {
1533
+ "chain": "I",
1534
+ "organism": "Homo sapiens",
1535
+ "classification": "human"
1536
+ },
1537
+ "7joo": {
1538
+ "chain": "C",
1539
+ "organism": "Homo sapiens",
1540
+ "classification": "human"
1541
+ },
1542
+ "4tsa": {
1543
+ "chain": "A",
1544
+ "organism": "Gallus gallus",
1545
+ "classification": "other"
1546
+ },
1547
+ "1ob1": {
1548
+ "chain": "F",
1549
+ "organism": "PLASMODIUM FALCIPARUM",
1550
+ "classification": "other"
1551
+ },
1552
+ "6wzm": {
1553
+ "chain": "F",
1554
+ "organism": "Homo sapiens",
1555
+ "classification": "human"
1556
+ },
1557
+ "1fj1": {
1558
+ "chain": "E",
1559
+ "organism": "Borrelia burgdorferi",
1560
+ "classification": "other"
1561
+ },
1562
+ "7ttx": {
1563
+ "chain": "A",
1564
+ "organism": "Bat coronavirus RaTG13",
1565
+ "classification": "viral"
1566
+ },
1567
+ "6wmw": {
1568
+ "chain": "B",
1569
+ "organism": "Homo sapiens",
1570
+ "classification": "human"
1571
+ },
1572
+ "4ot1": {
1573
+ "chain": "A",
1574
+ "organism": "Human Cytomegalovirus",
1575
+ "classification": "viral"
1576
+ },
1577
+ "7qu2": {
1578
+ "chain": "C",
1579
+ "organism": "Argentinian mammarenavirus",
1580
+ "classification": "viral"
1581
+ },
1582
+ "7sgm": {
1583
+ "chain": "B",
1584
+ "organism": "Homo sapiens",
1585
+ "classification": "human"
1586
+ },
1587
+ "7phu": {
1588
+ "chain": "A",
1589
+ "organism": "Plasmodium falciparum (isolate 3D7)",
1590
+ "classification": "other"
1591
+ },
1592
+ "6sni": {
1593
+ "chain": "X",
1594
+ "organism": "Saccharomyces cerevisiae",
1595
+ "classification": "fungal"
1596
+ },
1597
+ "6aod": {
1598
+ "chain": "C",
1599
+ "organism": "Homo sapiens",
1600
+ "classification": "human"
1601
+ },
1602
+ "8e8y": {
1603
+ "chain": "1",
1604
+ "organism": "Human poliovirus 2 strain Sabin",
1605
+ "classification": "viral"
1606
+ },
1607
+ "1e6j": {
1608
+ "chain": "P",
1609
+ "organism": "HIV-1 M\\:B_HXB2R",
1610
+ "classification": "other"
1611
+ },
1612
+ "8gat": {
1613
+ "chain": "A",
1614
+ "organism": "Homo sapiens",
1615
+ "classification": "human"
1616
+ },
1617
+ "5u3m": {
1618
+ "chain": "A",
1619
+ "organism": "Human immunodeficiency virus 1",
1620
+ "classification": "viral"
1621
+ },
1622
+ "7ki6": {
1623
+ "chain": "A",
1624
+ "organism": "Hendra henipavirus",
1625
+ "classification": "viral"
1626
+ },
1627
+ "2uzi": {
1628
+ "chain": "R",
1629
+ "organism": "HOMO SAPIENS",
1630
+ "classification": "human"
1631
+ },
1632
+ "4wv1": {
1633
+ "chain": "F",
1634
+ "organism": "Homo sapiens",
1635
+ "classification": "human"
1636
+ },
1637
+ "8caf": {
1638
+ "chain": "G",
1639
+ "organism": "Homo sapiens",
1640
+ "classification": "human"
1641
+ },
1642
+ "7ly0": {
1643
+ "chain": "A",
1644
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1645
+ "classification": "viral"
1646
+ },
1647
+ "5dhv": {
1648
+ "chain": "M",
1649
+ "organism": "Human immunodeficiency virus 1",
1650
+ "classification": "viral"
1651
+ },
1652
+ "5yoy": {
1653
+ "chain": "A",
1654
+ "organism": "Homo sapiens",
1655
+ "classification": "human"
1656
+ },
1657
+ "4g6f": {
1658
+ "chain": "F",
1659
+ "organism": "Human immunodeficiency virus 1",
1660
+ "classification": "viral"
1661
+ },
1662
+ "7xco": {
1663
+ "chain": "C",
1664
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1665
+ "classification": "viral"
1666
+ },
1667
+ "8vyn": {
1668
+ "chain": "C",
1669
+ "organism": "Human betaherpesvirus 5",
1670
+ "classification": "viral"
1671
+ },
1672
+ "8jkf": {
1673
+ "chain": "A",
1674
+ "organism": "Zika virus",
1675
+ "classification": "viral"
1676
+ },
1677
+ "6iek": {
1678
+ "chain": "D",
1679
+ "organism": "Rift valley fever virus",
1680
+ "classification": "viral"
1681
+ },
1682
+ "6wbv": {
1683
+ "chain": "A",
1684
+ "organism": "Homo sapiens",
1685
+ "classification": "human"
1686
+ },
1687
+ "6hx4": {
1688
+ "chain": "A",
1689
+ "organism": "Homo sapiens",
1690
+ "classification": "human"
1691
+ },
1692
+ "7rxl": {
1693
+ "chain": "F",
1694
+ "organism": "Plasmodium falciparum (isolate 3D7)",
1695
+ "classification": "other"
1696
+ },
1697
+ "5fb8": {
1698
+ "chain": "C",
1699
+ "organism": "Homo sapiens",
1700
+ "classification": "human"
1701
+ },
1702
+ "5l0q": {
1703
+ "chain": "A",
1704
+ "organism": "Bos taurus",
1705
+ "classification": "other"
1706
+ },
1707
+ "7a3o": {
1708
+ "chain": "A",
1709
+ "organism": "Dengue virus 1",
1710
+ "classification": "viral"
1711
+ },
1712
+ "6vn1": {
1713
+ "chain": "C",
1714
+ "organism": "Human alphaherpesvirus 3",
1715
+ "classification": "viral"
1716
+ },
1717
+ "8ol9": {
1718
+ "chain": "H",
1719
+ "organism": "Homo sapiens",
1720
+ "classification": "human"
1721
+ },
1722
+ "3mj9": {
1723
+ "chain": "A",
1724
+ "organism": "Mus musculus",
1725
+ "classification": "other"
1726
+ },
1727
+ "6lz9": {
1728
+ "chain": "B",
1729
+ "organism": "Homo sapiens",
1730
+ "classification": "human"
1731
+ },
1732
+ "5d93": {
1733
+ "chain": "A",
1734
+ "organism": "Mus musculus",
1735
+ "classification": "other"
1736
+ },
1737
+ "3i50": {
1738
+ "chain": "E",
1739
+ "organism": "West Nile virus",
1740
+ "classification": "viral"
1741
+ },
1742
+ "6blh": {
1743
+ "chain": "G",
1744
+ "organism": "Human respiratory syncytial virus A (strain rsb6256)",
1745
+ "classification": "viral"
1746
+ },
1747
+ "8jel": {
1748
+ "chain": "J",
1749
+ "organism": "Homo sapiens",
1750
+ "classification": "human"
1751
+ },
1752
+ "5te4": {
1753
+ "chain": "G",
1754
+ "organism": "Human immunodeficiency virus 1",
1755
+ "classification": "viral"
1756
+ },
1757
+ "7m3n": {
1758
+ "chain": "A",
1759
+ "organism": "Canine parvovirus type 2",
1760
+ "classification": "viral"
1761
+ },
1762
+ "6o3b": {
1763
+ "chain": "C",
1764
+ "organism": "Homo sapiens",
1765
+ "classification": "human"
1766
+ },
1767
+ "5lcv": {
1768
+ "chain": "A",
1769
+ "organism": "Zika virus",
1770
+ "classification": "viral"
1771
+ },
1772
+ "8ath": {
1773
+ "chain": "A",
1774
+ "organism": "Homo sapiens",
1775
+ "classification": "human"
1776
+ },
1777
+ "5nmv": {
1778
+ "chain": "K",
1779
+ "organism": "Homo sapiens",
1780
+ "classification": "human"
1781
+ },
1782
+ "8hrx": {
1783
+ "chain": "A",
1784
+ "organism": "Homo sapiens",
1785
+ "classification": "human"
1786
+ },
1787
+ "7uvi": {
1788
+ "chain": "C",
1789
+ "organism": "Plasmodium falciparum",
1790
+ "classification": "other"
1791
+ },
1792
+ "9fjk": {
1793
+ "chain": "B",
1794
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1795
+ "classification": "viral"
1796
+ },
1797
+ "6lxj": {
1798
+ "chain": "D",
1799
+ "organism": "Influenza A virus (A/Anhui/1-BALF_RG44/2013(H7N9))",
1800
+ "classification": "viral"
1801
+ },
1802
+ "7ttm": {
1803
+ "chain": "A",
1804
+ "organism": "Bat SARS-like coronavirus RsSHC014",
1805
+ "classification": "viral"
1806
+ },
1807
+ "2aep": {
1808
+ "chain": "A",
1809
+ "organism": "Influenza A virus",
1810
+ "classification": "viral"
1811
+ },
1812
+ "1sy6": {
1813
+ "chain": "A",
1814
+ "organism": "Homo sapiens",
1815
+ "classification": "human"
1816
+ },
1817
+ "8udz": {
1818
+ "chain": "B",
1819
+ "organism": "Homo sapiens",
1820
+ "classification": "human"
1821
+ },
1822
+ "1xf5": {
1823
+ "chain": "L",
1824
+ "organism": "Finegoldia magna",
1825
+ "classification": "other"
1826
+ },
1827
+ "1egj": {
1828
+ "chain": "A",
1829
+ "organism": "Homo sapiens",
1830
+ "classification": "human"
1831
+ },
1832
+ "6xcj": {
1833
+ "chain": "G",
1834
+ "organism": "Human immunodeficiency virus 1",
1835
+ "classification": "viral"
1836
+ },
1837
+ "6ml8": {
1838
+ "chain": "A",
1839
+ "organism": "Influenza A virus",
1840
+ "classification": "viral"
1841
+ },
1842
+ "5jq6": {
1843
+ "chain": "A",
1844
+ "organism": "Staphylococcus aureus",
1845
+ "classification": "bacterial"
1846
+ },
1847
+ "4i2x": {
1848
+ "chain": "E",
1849
+ "organism": "Homo sapiens",
1850
+ "classification": "human"
1851
+ },
1852
+ "6wo4": {
1853
+ "chain": "E",
1854
+ "organism": "Recombinant Hepatitis C virus HK6a/JFH-1",
1855
+ "classification": "viral"
1856
+ },
1857
+ "4np4": {
1858
+ "chain": "A",
1859
+ "organism": "Clostridium difficile",
1860
+ "classification": "other"
1861
+ },
1862
+ "4ps4": {
1863
+ "chain": "A",
1864
+ "organism": "Homo sapiens",
1865
+ "classification": "human"
1866
+ },
1867
+ "3idx": {
1868
+ "chain": "G",
1869
+ "organism": "Human immunodeficiency virus 1",
1870
+ "classification": "viral"
1871
+ },
1872
+ "6iuv": {
1873
+ "chain": "A",
1874
+ "organism": "Influenza A virus (A/Hong Kong/482/97(H5N1))",
1875
+ "classification": "viral"
1876
+ },
1877
+ "8h07": {
1878
+ "chain": "B",
1879
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
1880
+ "classification": "viral"
1881
+ },
1882
+ "3gi9": {
1883
+ "chain": "C",
1884
+ "organism": "Methanocaldococcus jannaschii",
1885
+ "classification": "other"
1886
+ },
1887
+ "6bgt": {
1888
+ "chain": "C",
1889
+ "organism": "Homo sapiens",
1890
+ "classification": "human"
1891
+ },
1892
+ "8t9z": {
1893
+ "chain": "A",
1894
+ "organism": "Human metapneumovirus",
1895
+ "classification": "viral"
1896
+ },
1897
+ "2vxs": {
1898
+ "chain": "D",
1899
+ "organism": "HOMO SAPIENS",
1900
+ "classification": "human"
1901
+ },
1902
+ "2vxt": {
1903
+ "chain": "I",
1904
+ "organism": "HOMO SAPIENS",
1905
+ "classification": "human"
1906
+ },
1907
+ "5u3d": {
1908
+ "chain": "E",
1909
+ "organism": "Finegoldia magna",
1910
+ "classification": "other"
1911
+ },
1912
+ "6phc": {
1913
+ "chain": "I",
1914
+ "organism": "Plasmodium falciparum",
1915
+ "classification": "other"
1916
+ },
1917
+ "1fbi": {
1918
+ "chain": "Y",
1919
+ "organism": "Numida meleagris",
1920
+ "classification": "other"
1921
+ },
1922
+ "6adb": {
1923
+ "chain": "A",
1924
+ "organism": "Escherichia coli (strain K12)",
1925
+ "classification": "bacterial"
1926
+ },
1927
+ "4plj": {
1928
+ "chain": "A",
1929
+ "organism": "Hepatitis E virus",
1930
+ "classification": "viral"
1931
+ },
1932
+ "1rjl": {
1933
+ "chain": "C",
1934
+ "organism": "Borrelia burgdorferi",
1935
+ "classification": "other"
1936
+ },
1937
+ "5vyf": {
1938
+ "chain": "F",
1939
+ "organism": "Felis catus",
1940
+ "classification": "other"
1941
+ },
1942
+ "4jzj": {
1943
+ "chain": "D",
1944
+ "organism": "Homo sapiens",
1945
+ "classification": "human"
1946
+ },
1947
+ "5xj4": {
1948
+ "chain": "A",
1949
+ "organism": "Homo sapiens",
1950
+ "classification": "human"
1951
+ },
1952
+ "8ruu": {
1953
+ "chain": "Y",
1954
+ "organism": "Homo sapiens",
1955
+ "classification": "human"
1956
+ },
1957
+ "6v4p": {
1958
+ "chain": "B",
1959
+ "organism": "Homo sapiens",
1960
+ "classification": "human"
1961
+ },
1962
+ "4rdq": {
1963
+ "chain": "C",
1964
+ "organism": "Gallus gallus",
1965
+ "classification": "other"
1966
+ },
1967
+ "4jlr": {
1968
+ "chain": "C",
1969
+ "organism": "Unknown",
1970
+ "classification": "other"
1971
+ },
1972
+ "8tqa": {
1973
+ "chain": "A",
1974
+ "organism": "Mus musculus",
1975
+ "classification": "other"
1976
+ },
1977
+ "8dcn": {
1978
+ "chain": "F",
1979
+ "organism": "Clostridioides difficile",
1980
+ "classification": "other"
1981
+ },
1982
+ "6v4n": {
1983
+ "chain": "W",
1984
+ "organism": "Influenza B virus",
1985
+ "classification": "viral"
1986
+ },
1987
+ "6vzi": {
1988
+ "chain": "G",
1989
+ "organism": "Human immunodeficiency virus 1",
1990
+ "classification": "viral"
1991
+ },
1992
+ "5if0": {
1993
+ "chain": "I",
1994
+ "organism": "Homo sapiens",
1995
+ "classification": "human"
1996
+ },
1997
+ "5k9k": {
1998
+ "chain": "F",
1999
+ "organism": "Influenza A virus (strain A/Hong Kong/1/1968 H3N2)",
2000
+ "classification": "viral"
2001
+ },
2002
+ "5veb": {
2003
+ "chain": "Y",
2004
+ "organism": "Homo sapiens",
2005
+ "classification": "human"
2006
+ },
2007
+ "1tqb": {
2008
+ "chain": "A",
2009
+ "organism": "Ovis aries",
2010
+ "classification": "other"
2011
+ },
2012
+ "3nh7": {
2013
+ "chain": "C",
2014
+ "organism": "Homo sapiens",
2015
+ "classification": "human"
2016
+ },
2017
+ "4lvo": {
2018
+ "chain": "A",
2019
+ "organism": "Plasmodium falciparum",
2020
+ "classification": "other"
2021
+ },
2022
+ "8r8d": {
2023
+ "chain": "B",
2024
+ "organism": "Homo sapiens",
2025
+ "classification": "human"
2026
+ },
2027
+ "4rgo": {
2028
+ "chain": "S",
2029
+ "organism": "Staphylococcus aureus",
2030
+ "classification": "bacterial"
2031
+ },
2032
+ "7r58": {
2033
+ "chain": "A",
2034
+ "organism": "Homo sapiens",
2035
+ "classification": "human"
2036
+ },
2037
+ "4ala": {
2038
+ "chain": "C",
2039
+ "organism": "DENGUE VIRUS 3",
2040
+ "classification": "viral"
2041
+ },
2042
+ "4f37": {
2043
+ "chain": "A",
2044
+ "organism": "Escherichia coli",
2045
+ "classification": "bacterial"
2046
+ },
2047
+ "7yru": {
2048
+ "chain": "A",
2049
+ "organism": "Homo sapiens",
2050
+ "classification": "human"
2051
+ },
2052
+ "4aei": {
2053
+ "chain": "C",
2054
+ "organism": "ANDROCTONUS AUSTRALIS HECTOR",
2055
+ "classification": "other"
2056
+ },
2057
+ "6u2f": {
2058
+ "chain": "A",
2059
+ "organism": "Homo sapiens",
2060
+ "classification": "human"
2061
+ },
2062
+ "5w5x": {
2063
+ "chain": "A",
2064
+ "organism": "Homo sapiens",
2065
+ "classification": "human"
2066
+ },
2067
+ "6q0l": {
2068
+ "chain": "A",
2069
+ "organism": "Influenza A virus (A/Beijing/262/1995(H1N1))",
2070
+ "classification": "viral"
2071
+ },
2072
+ "5tq0": {
2073
+ "chain": "A",
2074
+ "organism": "Xenopus laevis",
2075
+ "classification": "other"
2076
+ },
2077
+ "3jwd": {
2078
+ "chain": "A",
2079
+ "organism": "Human immunodeficiency virus 1",
2080
+ "classification": "viral"
2081
+ },
2082
+ "6oor": {
2083
+ "chain": "A",
2084
+ "organism": "Mus musculus",
2085
+ "classification": "other"
2086
+ },
2087
+ "8g8d": {
2088
+ "chain": "P",
2089
+ "organism": "Human immunodeficiency virus 1",
2090
+ "classification": "viral"
2091
+ },
2092
+ "7uot": {
2093
+ "chain": "C",
2094
+ "organism": "Lassa virus",
2095
+ "classification": "viral"
2096
+ },
2097
+ "7lsg": {
2098
+ "chain": "C",
2099
+ "organism": "Tick-borne encephalitis virus",
2100
+ "classification": "viral"
2101
+ },
2102
+ "8sak": {
2103
+ "chain": "B",
2104
+ "organism": "unclassified Merbecovirus",
2105
+ "classification": "viral"
2106
+ },
2107
+ "6uvo": {
2108
+ "chain": "D",
2109
+ "organism": "Human respiratory syncytial virus A (strain A2)",
2110
+ "classification": "viral"
2111
+ },
2112
+ "7uij": {
2113
+ "chain": "D",
2114
+ "organism": "Borreliella burgdorferi B31",
2115
+ "classification": "other"
2116
+ },
2117
+ "7lse": {
2118
+ "chain": "E",
2119
+ "organism": "Tick-borne encephalitis virus Far Eastern subtype",
2120
+ "classification": "viral"
2121
+ },
2122
+ "6cw2": {
2123
+ "chain": "C",
2124
+ "organism": "Saccharomyces cerevisiae",
2125
+ "classification": "fungal"
2126
+ },
2127
+ "4j6r": {
2128
+ "chain": "G",
2129
+ "organism": "HUMAN IMMUNODEFICIENCY VIRUS 1",
2130
+ "classification": "viral"
2131
+ },
2132
+ "7n3c": {
2133
+ "chain": "C",
2134
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2135
+ "classification": "viral"
2136
+ },
2137
+ "3wkm": {
2138
+ "chain": "B",
2139
+ "organism": "Aquifex aeolicus",
2140
+ "classification": "other"
2141
+ },
2142
+ "6s5a": {
2143
+ "chain": "A",
2144
+ "organism": "Homo sapiens",
2145
+ "classification": "human"
2146
+ },
2147
+ "5ikc": {
2148
+ "chain": "N",
2149
+ "organism": "Homo sapiens",
2150
+ "classification": "human"
2151
+ },
2152
+ "6s3d": {
2153
+ "chain": "M",
2154
+ "organism": "synthetic construct",
2155
+ "classification": "other"
2156
+ },
2157
+ "7ahu": {
2158
+ "chain": "C",
2159
+ "organism": "Homo sapiens",
2160
+ "classification": "human"
2161
+ },
2162
+ "8c7m": {
2163
+ "chain": "B",
2164
+ "organism": "Homo sapiens",
2165
+ "classification": "human"
2166
+ },
2167
+ "7xnf": {
2168
+ "chain": "A",
2169
+ "organism": "Pangolin coronavirus",
2170
+ "classification": "viral"
2171
+ },
2172
+ "8y0r": {
2173
+ "chain": "2",
2174
+ "organism": "Foot-and-mouth disease virus A",
2175
+ "classification": "viral"
2176
+ },
2177
+ "8y6i": {
2178
+ "chain": "A",
2179
+ "organism": "Homo sapiens",
2180
+ "classification": "human"
2181
+ },
2182
+ "3t2n": {
2183
+ "chain": "A",
2184
+ "organism": "Homo sapiens",
2185
+ "classification": "human"
2186
+ },
2187
+ "4zff": {
2188
+ "chain": "D",
2189
+ "organism": "Homo sapiens",
2190
+ "classification": "human"
2191
+ },
2192
+ "4m62": {
2193
+ "chain": "T",
2194
+ "organism": "synthetic construct",
2195
+ "classification": "other"
2196
+ },
2197
+ "4ag4": {
2198
+ "chain": "A",
2199
+ "organism": "HOMO SAPIENS",
2200
+ "classification": "human"
2201
+ },
2202
+ "5ukr": {
2203
+ "chain": "G",
2204
+ "organism": "Human immunodeficiency virus 1",
2205
+ "classification": "viral"
2206
+ },
2207
+ "8d1t": {
2208
+ "chain": "A",
2209
+ "organism": "Homo sapiens",
2210
+ "classification": "human"
2211
+ },
2212
+ "4nzr": {
2213
+ "chain": "M",
2214
+ "organism": "Mycoplasma genitalium",
2215
+ "classification": "other"
2216
+ },
2217
+ "6xlq": {
2218
+ "chain": "A",
2219
+ "organism": "Homo sapiens",
2220
+ "classification": "human"
2221
+ },
2222
+ "8da1": {
2223
+ "chain": "I",
2224
+ "organism": "Bungarus multicinctus",
2225
+ "classification": "other"
2226
+ },
2227
+ "7u5b": {
2228
+ "chain": "I",
2229
+ "organism": "Homo sapiens",
2230
+ "classification": "human"
2231
+ },
2232
+ "3q1s": {
2233
+ "chain": "I",
2234
+ "organism": "Homo sapiens",
2235
+ "classification": "human"
2236
+ },
2237
+ "6by3": {
2238
+ "chain": "C",
2239
+ "organism": "Streptomyces coelicolor",
2240
+ "classification": "other"
2241
+ },
2242
+ "6lyn": {
2243
+ "chain": "C",
2244
+ "organism": "Homo sapiens",
2245
+ "classification": "human"
2246
+ },
2247
+ "5occ": {
2248
+ "chain": "A",
2249
+ "organism": "Homo sapiens",
2250
+ "classification": "human"
2251
+ },
2252
+ "8jxs": {
2253
+ "chain": "A",
2254
+ "organism": "Homo sapiens",
2255
+ "classification": "human"
2256
+ },
2257
+ "4lmq": {
2258
+ "chain": "D",
2259
+ "organism": "Homo sapiens",
2260
+ "classification": "human"
2261
+ },
2262
+ "5w5z": {
2263
+ "chain": "A",
2264
+ "organism": "Homo sapiens",
2265
+ "classification": "human"
2266
+ },
2267
+ "7k9j": {
2268
+ "chain": "C",
2269
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2270
+ "classification": "viral"
2271
+ },
2272
+ "5k59": {
2273
+ "chain": "B",
2274
+ "organism": "Staphylococcus aureus (strain USA300)",
2275
+ "classification": "bacterial"
2276
+ },
2277
+ "8kdm": {
2278
+ "chain": "B",
2279
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2280
+ "classification": "viral"
2281
+ },
2282
+ "7lr4": {
2283
+ "chain": "C",
2284
+ "organism": "Plasmodium berghei",
2285
+ "classification": "other"
2286
+ },
2287
+ "7lf7": {
2288
+ "chain": "M",
2289
+ "organism": "Homo sapiens",
2290
+ "classification": "human"
2291
+ },
2292
+ "8w86": {
2293
+ "chain": "D",
2294
+ "organism": "Homo sapiens",
2295
+ "classification": "human"
2296
+ },
2297
+ "7wvg": {
2298
+ "chain": "B",
2299
+ "organism": "Influenza A virus",
2300
+ "classification": "viral"
2301
+ },
2302
+ "6wix": {
2303
+ "chain": "G",
2304
+ "organism": "Human immunodeficiency virus 1",
2305
+ "classification": "viral"
2306
+ },
2307
+ "7xrz": {
2308
+ "chain": "Y",
2309
+ "organism": "Escherichia coli",
2310
+ "classification": "bacterial"
2311
+ },
2312
+ "1jrh": {
2313
+ "chain": "I",
2314
+ "organism": "Homo sapiens",
2315
+ "classification": "human"
2316
+ },
2317
+ "3ks0": {
2318
+ "chain": "A",
2319
+ "organism": "Saccharomyces cerevisiae",
2320
+ "classification": "fungal"
2321
+ },
2322
+ "7x29": {
2323
+ "chain": "C",
2324
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2325
+ "classification": "viral"
2326
+ },
2327
+ "5x2n": {
2328
+ "chain": "C",
2329
+ "organism": "Oryzias latipes",
2330
+ "classification": "other"
2331
+ },
2332
+ "7lkf": {
2333
+ "chain": "A",
2334
+ "organism": "Gallus gallus",
2335
+ "classification": "other"
2336
+ },
2337
+ "6mi2": {
2338
+ "chain": "C",
2339
+ "organism": "Homo sapiens",
2340
+ "classification": "human"
2341
+ },
2342
+ "5dur": {
2343
+ "chain": "A",
2344
+ "organism": "Influenza A virus (A/Anhui/1/2005(H5N1))",
2345
+ "classification": "viral"
2346
+ },
2347
+ "7kyl": {
2348
+ "chain": "Z",
2349
+ "organism": "Powassan virus",
2350
+ "classification": "viral"
2351
+ },
2352
+ "7tuy": {
2353
+ "chain": "R",
2354
+ "organism": "Homo sapiens",
2355
+ "classification": "human"
2356
+ },
2357
+ "4qhu": {
2358
+ "chain": "C",
2359
+ "organism": "Homo sapiens",
2360
+ "classification": "human"
2361
+ },
2362
+ "7e72": {
2363
+ "chain": "E",
2364
+ "organism": "Homo sapiens",
2365
+ "classification": "human"
2366
+ },
2367
+ "4g7v": {
2368
+ "chain": "S",
2369
+ "organism": "Ciona intestinalis",
2370
+ "classification": "other"
2371
+ },
2372
+ "8f38": {
2373
+ "chain": "C",
2374
+ "organism": "synthetic construct",
2375
+ "classification": "other"
2376
+ },
2377
+ "8hpk": {
2378
+ "chain": "A",
2379
+ "organism": "Oxalobacter formigenes",
2380
+ "classification": "other"
2381
+ },
2382
+ "6rlo": {
2383
+ "chain": "J",
2384
+ "organism": "Homo sapiens",
2385
+ "classification": "human"
2386
+ },
2387
+ "4dw2": {
2388
+ "chain": "U",
2389
+ "organism": "Homo sapiens",
2390
+ "classification": "human"
2391
+ },
2392
+ "6uj9": {
2393
+ "chain": "A",
2394
+ "organism": "Homo sapiens",
2395
+ "classification": "human"
2396
+ },
2397
+ "8g3q": {
2398
+ "chain": "G",
2399
+ "organism": "Influenza A virus",
2400
+ "classification": "viral"
2401
+ },
2402
+ "4kxz": {
2403
+ "chain": "D",
2404
+ "organism": "Homo sapiens",
2405
+ "classification": "human"
2406
+ },
2407
+ "8v5q": {
2408
+ "chain": "G",
2409
+ "organism": "Human alphaherpesvirus 3",
2410
+ "classification": "viral"
2411
+ },
2412
+ "6z7z": {
2413
+ "chain": "F",
2414
+ "organism": "Sus scrofa",
2415
+ "classification": "other"
2416
+ },
2417
+ "8vgn": {
2418
+ "chain": "I",
2419
+ "organism": "Homo sapiens",
2420
+ "classification": "human"
2421
+ },
2422
+ "1ztx": {
2423
+ "chain": "E",
2424
+ "organism": "West Nile virus",
2425
+ "classification": "viral"
2426
+ },
2427
+ "6jbt": {
2428
+ "chain": "F",
2429
+ "organism": "Homo sapiens",
2430
+ "classification": "human"
2431
+ },
2432
+ "2r56": {
2433
+ "chain": "A",
2434
+ "organism": "Bos taurus",
2435
+ "classification": "other"
2436
+ },
2437
+ "1iqd": {
2438
+ "chain": "C",
2439
+ "organism": "Homo sapiens",
2440
+ "classification": "human"
2441
+ },
2442
+ "2q8a": {
2443
+ "chain": "A",
2444
+ "organism": "Plasmodium falciparum",
2445
+ "classification": "other"
2446
+ },
2447
+ "6pis": {
2448
+ "chain": "B",
2449
+ "organism": "Mus musculus",
2450
+ "classification": "other"
2451
+ },
2452
+ "5cbe": {
2453
+ "chain": "F",
2454
+ "organism": "Homo sapiens",
2455
+ "classification": "human"
2456
+ },
2457
+ "4xwo": {
2458
+ "chain": "A",
2459
+ "organism": "Saccharomyces cerevisiae (ATCC 204508 / S288c)",
2460
+ "classification": "fungal"
2461
+ },
2462
+ "7upb": {
2463
+ "chain": "D",
2464
+ "organism": "Nipah henipavirus",
2465
+ "classification": "viral"
2466
+ },
2467
+ "6uyf": {
2468
+ "chain": "E",
2469
+ "organism": "Recombinant Hepatitis C virus HK6a/JFH-1",
2470
+ "classification": "viral"
2471
+ },
2472
+ "8tbq": {
2473
+ "chain": "P",
2474
+ "organism": "Homo sapiens",
2475
+ "classification": "human"
2476
+ },
2477
+ "7ox4": {
2478
+ "chain": "C",
2479
+ "organism": "Mus musculus",
2480
+ "classification": "other"
2481
+ },
2482
+ "6j5d": {
2483
+ "chain": "A",
2484
+ "organism": "Louping ill virus",
2485
+ "classification": "viral"
2486
+ },
2487
+ "4dtg": {
2488
+ "chain": "K",
2489
+ "organism": "Homo sapiens",
2490
+ "classification": "human"
2491
+ },
2492
+ "6cmi": {
2493
+ "chain": "B",
2494
+ "organism": "Hendra virus",
2495
+ "classification": "viral"
2496
+ },
2497
+ "3efd": {
2498
+ "chain": "K",
2499
+ "organism": "Escherichia coli",
2500
+ "classification": "bacterial"
2501
+ },
2502
+ "4k3j": {
2503
+ "chain": "B",
2504
+ "organism": "Homo sapiens",
2505
+ "classification": "human"
2506
+ },
2507
+ "8rp8": {
2508
+ "chain": "D",
2509
+ "organism": "Homo sapiens",
2510
+ "classification": "human"
2511
+ },
2512
+ "7a3q": {
2513
+ "chain": "A",
2514
+ "organism": "Dengue virus 4",
2515
+ "classification": "viral"
2516
+ },
2517
+ "1uac": {
2518
+ "chain": "Y",
2519
+ "organism": "Meleagris gallopavo",
2520
+ "classification": "other"
2521
+ },
2522
+ "7usl": {
2523
+ "chain": "C",
2524
+ "organism": "Bordetella pertussis",
2525
+ "classification": "other"
2526
+ },
2527
+ "6kyz": {
2528
+ "chain": "A",
2529
+ "organism": "Human rhinovirus 14",
2530
+ "classification": "viral"
2531
+ },
2532
+ "3ehb": {
2533
+ "chain": "B",
2534
+ "organism": "Paracoccus denitrificans",
2535
+ "classification": "other"
2536
+ },
2537
+ "4u6v": {
2538
+ "chain": "B",
2539
+ "organism": "Staphylococcus aureus subsp. aureus TCH60",
2540
+ "classification": "bacterial"
2541
+ },
2542
+ "5i9q": {
2543
+ "chain": "A",
2544
+ "organism": "Human immunodeficiency virus 1",
2545
+ "classification": "viral"
2546
+ },
2547
+ "1wej": {
2548
+ "chain": "F",
2549
+ "organism": "Equus caballus",
2550
+ "classification": "other"
2551
+ },
2552
+ "8xi6": {
2553
+ "chain": "C",
2554
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2555
+ "classification": "viral"
2556
+ },
2557
+ "5te7": {
2558
+ "chain": "G",
2559
+ "organism": "Human immunodeficiency virus 1",
2560
+ "classification": "viral"
2561
+ },
2562
+ "7nx3": {
2563
+ "chain": "F",
2564
+ "organism": "Homo sapiens",
2565
+ "classification": "human"
2566
+ },
2567
+ "7q6c": {
2568
+ "chain": "A",
2569
+ "organism": "Homo sapiens",
2570
+ "classification": "human"
2571
+ },
2572
+ "4xmp": {
2573
+ "chain": "G",
2574
+ "organism": "Human immunodeficiency virus 1",
2575
+ "classification": "viral"
2576
+ },
2577
+ "4dkf": {
2578
+ "chain": "A",
2579
+ "organism": "Homo sapiens",
2580
+ "classification": "human"
2581
+ },
2582
+ "7jtg": {
2583
+ "chain": "E",
2584
+ "organism": "Recombinant Hepatitis C virus HK6a/JFH-1",
2585
+ "classification": "viral"
2586
+ },
2587
+ "5d8j": {
2588
+ "chain": "A",
2589
+ "organism": "Mus musculus",
2590
+ "classification": "other"
2591
+ },
2592
+ "7xy8": {
2593
+ "chain": "A",
2594
+ "organism": "Homo sapiens",
2595
+ "classification": "human"
2596
+ },
2597
+ "5ob5": {
2598
+ "chain": "A",
2599
+ "organism": "Homo sapiens",
2600
+ "classification": "human"
2601
+ },
2602
+ "6iap": {
2603
+ "chain": "A",
2604
+ "organism": "Homo sapiens",
2605
+ "classification": "human"
2606
+ },
2607
+ "6vvu": {
2608
+ "chain": "B",
2609
+ "organism": "Homo sapiens",
2610
+ "classification": "human"
2611
+ },
2612
+ "8vvm": {
2613
+ "chain": "I",
2614
+ "organism": "Homo sapiens",
2615
+ "classification": "human"
2616
+ },
2617
+ "6ktr": {
2618
+ "chain": "C",
2619
+ "organism": "Homo sapiens",
2620
+ "classification": "human"
2621
+ },
2622
+ "6iw2": {
2623
+ "chain": "D",
2624
+ "organism": "Yellow fever virus (strain 17D vaccine)",
2625
+ "classification": "viral"
2626
+ },
2627
+ "6otc": {
2628
+ "chain": "A",
2629
+ "organism": "Lake Victoria marburgvirus (strain Popp-67)",
2630
+ "classification": "viral"
2631
+ },
2632
+ "5otj": {
2633
+ "chain": "D",
2634
+ "organism": "Phleum pratense",
2635
+ "classification": "other"
2636
+ },
2637
+ "6mej": {
2638
+ "chain": "C",
2639
+ "organism": "Hepacivirus C",
2640
+ "classification": "viral"
2641
+ },
2642
+ "3l5w": {
2643
+ "chain": "J",
2644
+ "organism": "Homo sapiens",
2645
+ "classification": "human"
2646
+ },
2647
+ "4okv": {
2648
+ "chain": "F",
2649
+ "organism": "Anopheles stephensi",
2650
+ "classification": "other"
2651
+ },
2652
+ "6p9h": {
2653
+ "chain": "A",
2654
+ "organism": "Staphylococcus aureus",
2655
+ "classification": "bacterial"
2656
+ },
2657
+ "6hig": {
2658
+ "chain": "B",
2659
+ "organism": "Homo sapiens",
2660
+ "classification": "human"
2661
+ },
2662
+ "8zd5": {
2663
+ "chain": "D",
2664
+ "organism": "Canis lupus familiaris",
2665
+ "classification": "other"
2666
+ },
2667
+ "8ulj": {
2668
+ "chain": "B",
2669
+ "organism": "Respiratory syncytial virus A2",
2670
+ "classification": "viral"
2671
+ },
2672
+ "5th9": {
2673
+ "chain": "B",
2674
+ "organism": "Homo sapiens",
2675
+ "classification": "human"
2676
+ },
2677
+ "3pnw": {
2678
+ "chain": "R",
2679
+ "organism": "Homo sapiens",
2680
+ "classification": "human"
2681
+ },
2682
+ "5vjo": {
2683
+ "chain": "F",
2684
+ "organism": "Anas platyrhynchos",
2685
+ "classification": "other"
2686
+ },
2687
+ "8j80": {
2688
+ "chain": "A",
2689
+ "organism": "Homo sapiens",
2690
+ "classification": "human"
2691
+ },
2692
+ "5l6y": {
2693
+ "chain": "C",
2694
+ "organism": "Homo sapiens",
2695
+ "classification": "human"
2696
+ },
2697
+ "6cxy": {
2698
+ "chain": "C",
2699
+ "organism": "Homo sapiens",
2700
+ "classification": "human"
2701
+ },
2702
+ "1jhl": {
2703
+ "chain": "A",
2704
+ "organism": "Phasianus colchicus",
2705
+ "classification": "other"
2706
+ },
2707
+ "1nl0": {
2708
+ "chain": "G",
2709
+ "organism": "Unknown",
2710
+ "classification": "other"
2711
+ },
2712
+ "3l5x": {
2713
+ "chain": "A",
2714
+ "organism": "Homo sapiens",
2715
+ "classification": "human"
2716
+ },
2717
+ "8vvk": {
2718
+ "chain": "B",
2719
+ "organism": "Crimean-Congo hemorrhagic fever virus",
2720
+ "classification": "viral"
2721
+ },
2722
+ "7tpd": {
2723
+ "chain": "A",
2724
+ "organism": "Homo sapiens",
2725
+ "classification": "human"
2726
+ },
2727
+ "6j15": {
2728
+ "chain": "D",
2729
+ "organism": "Homo sapiens",
2730
+ "classification": "human"
2731
+ },
2732
+ "7zli": {
2733
+ "chain": "A",
2734
+ "organism": "Caenorhabditis elegans",
2735
+ "classification": "other"
2736
+ },
2737
+ "2h9g": {
2738
+ "chain": "R",
2739
+ "organism": "Homo sapiens",
2740
+ "classification": "human"
2741
+ },
2742
+ "3lh2": {
2743
+ "chain": "V",
2744
+ "organism": "ARTIFICIAL GENE",
2745
+ "classification": "other"
2746
+ },
2747
+ "8dao": {
2748
+ "chain": "F",
2749
+ "organism": "Homo sapiens",
2750
+ "classification": "human"
2751
+ },
2752
+ "4i3r": {
2753
+ "chain": "G",
2754
+ "organism": "Human Immunodeficiency Virus",
2755
+ "classification": "viral"
2756
+ },
2757
+ "7vgr": {
2758
+ "chain": "A",
2759
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2760
+ "classification": "viral"
2761
+ },
2762
+ "3thm": {
2763
+ "chain": "F",
2764
+ "organism": "Homo sapiens",
2765
+ "classification": "human"
2766
+ },
2767
+ "8vgm": {
2768
+ "chain": "A",
2769
+ "organism": "Aliarcobacter butzleri RM4018",
2770
+ "classification": "other"
2771
+ },
2772
+ "8dfh": {
2773
+ "chain": "A",
2774
+ "organism": "Plasmodium falciparum 3D7",
2775
+ "classification": "other"
2776
+ },
2777
+ "7wtf": {
2778
+ "chain": "D",
2779
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2780
+ "classification": "viral"
2781
+ },
2782
+ "8y6h": {
2783
+ "chain": "A",
2784
+ "organism": "Homo sapiens",
2785
+ "classification": "human"
2786
+ },
2787
+ "5mhr": {
2788
+ "chain": "D",
2789
+ "organism": "Reovirus sp.",
2790
+ "classification": "viral"
2791
+ },
2792
+ "8h3n": {
2793
+ "chain": "C",
2794
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2795
+ "classification": "viral"
2796
+ },
2797
+ "7ujd": {
2798
+ "chain": "A",
2799
+ "organism": "Homo sapiens",
2800
+ "classification": "human"
2801
+ },
2802
+ "8d9y": {
2803
+ "chain": "K",
2804
+ "organism": "Oxyuranus scutellatus scutellatus",
2805
+ "classification": "other"
2806
+ },
2807
+ "1uj3": {
2808
+ "chain": "C",
2809
+ "organism": "Homo sapiens",
2810
+ "classification": "human"
2811
+ },
2812
+ "3nfp": {
2813
+ "chain": "I",
2814
+ "organism": "Homo sapiens",
2815
+ "classification": "human"
2816
+ },
2817
+ "6o9i": {
2818
+ "chain": "C",
2819
+ "organism": "Mus musculus",
2820
+ "classification": "other"
2821
+ },
2822
+ "4k94": {
2823
+ "chain": "C",
2824
+ "organism": "Homo sapiens",
2825
+ "classification": "human"
2826
+ },
2827
+ "7bbj": {
2828
+ "chain": "B",
2829
+ "organism": "Homo sapiens",
2830
+ "classification": "human"
2831
+ },
2832
+ "7mlh": {
2833
+ "chain": "F",
2834
+ "organism": "Dermatophagoides pteronyssinus",
2835
+ "classification": "other"
2836
+ },
2837
+ "7o52": {
2838
+ "chain": "U",
2839
+ "organism": "Homo sapiens",
2840
+ "classification": "human"
2841
+ },
2842
+ "6umx": {
2843
+ "chain": "A",
2844
+ "organism": "Homo sapiens",
2845
+ "classification": "human"
2846
+ },
2847
+ "7rah": {
2848
+ "chain": "E",
2849
+ "organism": "Bordetella pertussis",
2850
+ "classification": "other"
2851
+ },
2852
+ "5e8e": {
2853
+ "chain": "H",
2854
+ "organism": "Homo sapiens",
2855
+ "classification": "human"
2856
+ },
2857
+ "4edw": {
2858
+ "chain": "V",
2859
+ "organism": "Homo sapiens",
2860
+ "classification": "human"
2861
+ },
2862
+ "7t0l": {
2863
+ "chain": "D",
2864
+ "organism": "Homo sapiens",
2865
+ "classification": "human"
2866
+ },
2867
+ "8y6a": {
2868
+ "chain": "B",
2869
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
2870
+ "classification": "viral"
2871
+ },
2872
+ "4irz": {
2873
+ "chain": "A",
2874
+ "organism": "Oryctolagus cuniculus",
2875
+ "classification": "other"
2876
+ },
2877
+ "4lu5": {
2878
+ "chain": "A",
2879
+ "organism": "Vaccinia virus",
2880
+ "classification": "viral"
2881
+ },
2882
+ "6sv2": {
2883
+ "chain": "A",
2884
+ "organism": "Homo sapiens",
2885
+ "classification": "human"
2886
+ },
2887
+ "8tui": {
2888
+ "chain": "A",
2889
+ "organism": "Homo sapiens",
2890
+ "classification": "human"
2891
+ },
2892
+ "8dyx": {
2893
+ "chain": "I",
2894
+ "organism": "Plasmodium falciparum",
2895
+ "classification": "other"
2896
+ },
2897
+ "8w83": {
2898
+ "chain": "L",
2899
+ "organism": "Homo sapiens",
2900
+ "classification": "human"
2901
+ },
2902
+ "4rrp": {
2903
+ "chain": "R",
2904
+ "organism": "Saccharomyces cerevisiae",
2905
+ "classification": "fungal"
2906
+ },
2907
+ "5gzn": {
2908
+ "chain": "E",
2909
+ "organism": "Zika virus",
2910
+ "classification": "viral"
2911
+ },
2912
+ "5tl5": {
2913
+ "chain": "A",
2914
+ "organism": "Homo sapiens",
2915
+ "classification": "human"
2916
+ },
2917
+ "4ypg": {
2918
+ "chain": "D",
2919
+ "organism": "Homo sapiens",
2920
+ "classification": "human"
2921
+ },
2922
+ "5kw9": {
2923
+ "chain": "A",
2924
+ "organism": "Norwalk virus",
2925
+ "classification": "viral"
2926
+ },
2927
+ "2q8b": {
2928
+ "chain": "A",
2929
+ "organism": "Plasmodium falciparum",
2930
+ "classification": "other"
2931
+ },
2932
+ "8a1e": {
2933
+ "chain": "A",
2934
+ "organism": "Rabies virus strain Pasteur vaccin",
2935
+ "classification": "viral"
2936
+ },
2937
+ "4d9q": {
2938
+ "chain": "A",
2939
+ "organism": "Macaca mulatta",
2940
+ "classification": "other"
2941
+ },
2942
+ "6wo5": {
2943
+ "chain": "F",
2944
+ "organism": "Hepatitis C virus (isolate H)",
2945
+ "classification": "viral"
2946
+ },
2947
+ "2nyy": {
2948
+ "chain": "A",
2949
+ "organism": "Clostridium botulinum",
2950
+ "classification": "other"
2951
+ },
2952
+ "7lfa": {
2953
+ "chain": "C",
2954
+ "organism": "Homo sapiens",
2955
+ "classification": "human"
2956
+ },
2957
+ "6cyf": {
2958
+ "chain": "Q",
2959
+ "organism": "Pseudomonas aeruginosa",
2960
+ "classification": "bacterial"
2961
+ },
2962
+ "4zfg": {
2963
+ "chain": "A",
2964
+ "organism": "Homo sapiens",
2965
+ "classification": "human"
2966
+ },
2967
+ "8ts0": {
2968
+ "chain": "A",
2969
+ "organism": "Homo sapiens",
2970
+ "classification": "human"
2971
+ },
2972
+ "6n6b": {
2973
+ "chain": "A",
2974
+ "organism": "Influenza A virus (A/Minnesota/11/2010(H3N2))",
2975
+ "classification": "viral"
2976
+ },
2977
+ "6lxi": {
2978
+ "chain": "B",
2979
+ "organism": "Influenza A virus (strain A/Brevig Mission/1/1918 H1N1)",
2980
+ "classification": "viral"
2981
+ },
2982
+ "7phw": {
2983
+ "chain": "D",
2984
+ "organism": "Plasmodium falciparum (isolate 3D7)",
2985
+ "classification": "other"
2986
+ },
2987
+ "5vcn": {
2988
+ "chain": "A",
2989
+ "organism": "Dermatophagoides pteronyssinus",
2990
+ "classification": "other"
2991
+ },
2992
+ "5n7w": {
2993
+ "chain": "X",
2994
+ "organism": "Homo sapiens",
2995
+ "classification": "human"
2996
+ },
2997
+ "4uta": {
2998
+ "chain": "B",
2999
+ "organism": "DENGUE VIRUS 2",
3000
+ "classification": "viral"
3001
+ },
3002
+ "3zkn": {
3003
+ "chain": "B",
3004
+ "organism": "HOMO SAPIENS",
3005
+ "classification": "human"
3006
+ },
3007
+ "3w9e": {
3008
+ "chain": "C",
3009
+ "organism": "Human herpesvirus 2",
3010
+ "classification": "viral"
3011
+ },
3012
+ "8vdl": {
3013
+ "chain": "C",
3014
+ "organism": "Plasmodium falciparum HB3",
3015
+ "classification": "other"
3016
+ },
3017
+ "3u2s": {
3018
+ "chain": "G",
3019
+ "organism": "Human immunodeficiency virus 1",
3020
+ "classification": "viral"
3021
+ },
3022
+ "7z2m": {
3023
+ "chain": "G",
3024
+ "organism": "Homo sapiens",
3025
+ "classification": "human"
3026
+ },
3027
+ "5vkd": {
3028
+ "chain": "A",
3029
+ "organism": "Bundibugyo ebolavirus",
3030
+ "classification": "viral"
3031
+ },
3032
+ "6xqw": {
3033
+ "chain": "E",
3034
+ "organism": "Plasmodium falciparum",
3035
+ "classification": "other"
3036
+ },
3037
+ "6ck9": {
3038
+ "chain": "G",
3039
+ "organism": "Human immunodeficiency virus 1",
3040
+ "classification": "viral"
3041
+ },
3042
+ "4xvu": {
3043
+ "chain": "H",
3044
+ "organism": "Saccharomyces cerevisiae (ATCC 204508 / S288c)",
3045
+ "classification": "fungal"
3046
+ },
3047
+ "4oii": {
3048
+ "chain": "B",
3049
+ "organism": "West Nile virus",
3050
+ "classification": "viral"
3051
+ },
3052
+ "8gkl": {
3053
+ "chain": "E",
3054
+ "organism": "Homo sapiens",
3055
+ "classification": "human"
3056
+ },
3057
+ "6i8s": {
3058
+ "chain": "D",
3059
+ "organism": "Homo sapiens",
3060
+ "classification": "human"
3061
+ },
3062
+ "7unb": {
3063
+ "chain": "R",
3064
+ "organism": "Plasmodium falciparum",
3065
+ "classification": "other"
3066
+ },
3067
+ "8dg9": {
3068
+ "chain": "C",
3069
+ "organism": "Respiratory syncytial virus A2",
3070
+ "classification": "viral"
3071
+ },
3072
+ "7zjl": {
3073
+ "chain": "A",
3074
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
3075
+ "classification": "viral"
3076
+ },
3077
+ "7x8p": {
3078
+ "chain": "D",
3079
+ "organism": "Homo sapiens",
3080
+ "classification": "human"
3081
+ },
3082
+ "5wk3": {
3083
+ "chain": "C",
3084
+ "organism": "Homo sapiens",
3085
+ "classification": "human"
3086
+ },
3087
+ "6vtw": {
3088
+ "chain": "A",
3089
+ "organism": "Human respiratory syncytial virus A2",
3090
+ "classification": "viral"
3091
+ },
3092
+ "8tp7": {
3093
+ "chain": "C",
3094
+ "organism": "Influenza A virus (A/Singapore/1/1957(H2N2))",
3095
+ "classification": "viral"
3096
+ },
3097
+ "6u9s": {
3098
+ "chain": "C",
3099
+ "organism": "Homo sapiens",
3100
+ "classification": "human"
3101
+ },
3102
+ "8slb": {
3103
+ "chain": "A",
3104
+ "organism": "Thermotoga maritima MSB8",
3105
+ "classification": "other"
3106
+ },
3107
+ "2adf": {
3108
+ "chain": "A",
3109
+ "organism": "Homo sapiens",
3110
+ "classification": "human"
3111
+ },
3112
+ "2vxq": {
3113
+ "chain": "A",
3114
+ "organism": "PHLEUM PRATENSE",
3115
+ "classification": "other"
3116
+ },
3117
+ "4jr9": {
3118
+ "chain": "A",
3119
+ "organism": "Escherichia coli",
3120
+ "classification": "bacterial"
3121
+ },
3122
+ "7kpj": {
3123
+ "chain": "E",
3124
+ "organism": "Ruminococcus gnavus",
3125
+ "classification": "other"
3126
+ },
3127
+ "8jnk": {
3128
+ "chain": "C",
3129
+ "organism": "Homo sapiens",
3130
+ "classification": "human"
3131
+ },
3132
+ "7qu1": {
3133
+ "chain": "C",
3134
+ "organism": "Machupo mammarenavirus",
3135
+ "classification": "viral"
3136
+ },
3137
+ "8urf": {
3138
+ "chain": "A",
3139
+ "organism": "Homo sapiens",
3140
+ "classification": "human"
3141
+ },
3142
+ "6u8c": {
3143
+ "chain": "B",
3144
+ "organism": "Streptococcus sp. 'group G'",
3145
+ "classification": "bacterial"
3146
+ },
3147
+ "6tyb": {
3148
+ "chain": "G",
3149
+ "organism": "Simian immunodeficiency virus",
3150
+ "classification": "viral"
3151
+ },
3152
+ "7q6e": {
3153
+ "chain": "A",
3154
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
3155
+ "classification": "viral"
3156
+ },
3157
+ "5o1r": {
3158
+ "chain": "A",
3159
+ "organism": "Neisseria meningitidis",
3160
+ "classification": "other"
3161
+ },
3162
+ "7ket": {
3163
+ "chain": "C",
3164
+ "organism": "Neisseria meningitidis",
3165
+ "classification": "other"
3166
+ },
3167
+ "8t03": {
3168
+ "chain": "B",
3169
+ "organism": "Mus musculus",
3170
+ "classification": "other"
3171
+ },
3172
+ "8da0": {
3173
+ "chain": "E",
3174
+ "organism": "Dendroaspis polylepis polylepis",
3175
+ "classification": "other"
3176
+ },
3177
+ "7kq7": {
3178
+ "chain": "B",
3179
+ "organism": "Homo sapiens",
3180
+ "classification": "human"
3181
+ },
3182
+ "2jel": {
3183
+ "chain": "P",
3184
+ "organism": "Escherichia coli",
3185
+ "classification": "bacterial"
3186
+ },
3187
+ "8tfl": {
3188
+ "chain": "B",
3189
+ "organism": "Ricinus communis",
3190
+ "classification": "other"
3191
+ },
3192
+ "5w2b": {
3193
+ "chain": "A",
3194
+ "organism": "Reston ebolavirus",
3195
+ "classification": "viral"
3196
+ },
3197
+ "6s8j": {
3198
+ "chain": "E",
3199
+ "organism": "Ebola virus",
3200
+ "classification": "viral"
3201
+ },
3202
+ "3b9k": {
3203
+ "chain": "F",
3204
+ "organism": "Mus musculus",
3205
+ "classification": "other"
3206
+ },
3207
+ "4khx": {
3208
+ "chain": "A",
3209
+ "organism": "Human Immunodeficiency Virus 1",
3210
+ "classification": "viral"
3211
+ },
3212
+ "8tfn": {
3213
+ "chain": "B",
3214
+ "organism": "Homo sapiens",
3215
+ "classification": "human"
3216
+ },
3217
+ "8d7e": {
3218
+ "chain": "C",
3219
+ "organism": "Homo sapiens",
3220
+ "classification": "human"
3221
+ },
3222
+ "9gwt": {
3223
+ "chain": "P",
3224
+ "organism": "Homo sapiens",
3225
+ "classification": "human"
3226
+ },
3227
+ "6n81": {
3228
+ "chain": "A",
3229
+ "organism": "Norovirus Hu/GII.4/Farmington Hills/2004/USA",
3230
+ "classification": "viral"
3231
+ },
3232
+ "4mwf": {
3233
+ "chain": "D",
3234
+ "organism": "Hepatitis C virus (isolate H)",
3235
+ "classification": "viral"
3236
+ },
3237
+ "7chz": {
3238
+ "chain": "I",
3239
+ "organism": "Homo sapiens",
3240
+ "classification": "human"
3241
+ },
3242
+ "3mxw": {
3243
+ "chain": "A",
3244
+ "organism": "Homo sapiens",
3245
+ "classification": "human"
3246
+ },
3247
+ "2ypv": {
3248
+ "chain": "A",
3249
+ "organism": "NEISSERIA MENINGITIDIS MC58",
3250
+ "classification": "other"
3251
+ },
3252
+ "8ahn": {
3253
+ "chain": "A",
3254
+ "organism": "Sin Nombre orthohantavirus",
3255
+ "classification": "viral"
3256
+ },
3257
+ "5b71": {
3258
+ "chain": "E",
3259
+ "organism": "Homo sapiens",
3260
+ "classification": "human"
3261
+ },
3262
+ "7c61": {
3263
+ "chain": "A",
3264
+ "organism": "Homo sapiens",
3265
+ "classification": "human"
3266
+ },
3267
+ "7uvs": {
3268
+ "chain": "C",
3269
+ "organism": "Plasmodium falciparum",
3270
+ "classification": "other"
3271
+ },
3272
+ "8f60": {
3273
+ "chain": "C",
3274
+ "organism": "Homo sapiens",
3275
+ "classification": "human"
3276
+ },
3277
+ "1yjd": {
3278
+ "chain": "C",
3279
+ "organism": "Homo sapiens",
3280
+ "classification": "human"
3281
+ },
3282
+ "6m58": {
3283
+ "chain": "A",
3284
+ "organism": "Homo sapiens",
3285
+ "classification": "human"
3286
+ },
3287
+ "6osv": {
3288
+ "chain": "K",
3289
+ "organism": "Homo sapiens",
3290
+ "classification": "human"
3291
+ },
3292
+ "8jlw": {
3293
+ "chain": "A",
3294
+ "organism": "Crimean-Congo hemorrhagic fever orthonairovirus",
3295
+ "classification": "viral"
3296
+ },
3297
+ "1cl7": {
3298
+ "chain": "I",
3299
+ "organism": "Mus musculus",
3300
+ "classification": "other"
3301
+ },
3302
+ "3rkd": {
3303
+ "chain": "A",
3304
+ "organism": "Hepatitis E virus",
3305
+ "classification": "viral"
3306
+ },
3307
+ "5wi9": {
3308
+ "chain": "A",
3309
+ "organism": "Homo sapiens",
3310
+ "classification": "human"
3311
+ },
3312
+ "8y3u": {
3313
+ "chain": "K",
3314
+ "organism": "Ebola virus",
3315
+ "classification": "viral"
3316
+ },
3317
+ "6mvl": {
3318
+ "chain": "A",
3319
+ "organism": "Homo sapiens",
3320
+ "classification": "human"
3321
+ },
3322
+ "7l7r": {
3323
+ "chain": "G",
3324
+ "organism": "Crimean-Congo hemorrhagic fever virus (strain Nigeria/IbAr10200/1970)",
3325
+ "classification": "viral"
3326
+ },
3327
+ "8jlx": {
3328
+ "chain": "A",
3329
+ "organism": "Crimean-Congo hemorrhagic fever orthonairovirus",
3330
+ "classification": "viral"
3331
+ },
3332
+ "8b7h": {
3333
+ "chain": "A",
3334
+ "organism": "Homo sapiens",
3335
+ "classification": "human"
3336
+ },
3337
+ "2qqk": {
3338
+ "chain": "A",
3339
+ "organism": "Homo sapiens",
3340
+ "classification": "human"
3341
+ },
3342
+ "5d72": {
3343
+ "chain": "B",
3344
+ "organism": "Homo sapiens",
3345
+ "classification": "human"
3346
+ },
3347
+ "8tv1": {
3348
+ "chain": "C",
3349
+ "organism": "Homo sapiens",
3350
+ "classification": "human"
3351
+ },
3352
+ "5ggv": {
3353
+ "chain": "Y",
3354
+ "organism": "Homo sapiens",
3355
+ "classification": "human"
3356
+ },
3357
+ "5y9j": {
3358
+ "chain": "A",
3359
+ "organism": "Homo sapiens",
3360
+ "classification": "human"
3361
+ },
3362
+ "6c9u": {
3363
+ "chain": "A",
3364
+ "organism": "Saccharopolyspora erythraea",
3365
+ "classification": "other"
3366
+ },
3367
+ "8ut3": {
3368
+ "chain": "C",
3369
+ "organism": "Influenza A virus",
3370
+ "classification": "viral"
3371
+ },
3372
+ "5wux": {
3373
+ "chain": "E",
3374
+ "organism": "Homo sapiens",
3375
+ "classification": "human"
3376
+ },
3377
+ "6uyn": {
3378
+ "chain": "A",
3379
+ "organism": "Influenza A virus",
3380
+ "classification": "viral"
3381
+ },
3382
+ "4ij3": {
3383
+ "chain": "A",
3384
+ "organism": "Homo sapiens",
3385
+ "classification": "human"
3386
+ },
3387
+ "8a99": {
3388
+ "chain": "C",
3389
+ "organism": "Severe acute respiratory syndrome coronavirus 2",
3390
+ "classification": "viral"
3391
+ },
3392
+ "6ddm": {
3393
+ "chain": "C",
3394
+ "organism": "Homo sapiens",
3395
+ "classification": "human"
3396
+ },
3397
+ "4ffv": {
3398
+ "chain": "B",
3399
+ "organism": "Rattus norvegicus",
3400
+ "classification": "other"
3401
+ },
3402
+ "2bdn": {
3403
+ "chain": "A",
3404
+ "organism": "Unknown",
3405
+ "classification": "other"
3406
+ },
3407
+ "6h3t": {
3408
+ "chain": "B",
3409
+ "organism": "Bovine Schmallenberg virus",
3410
+ "classification": "viral"
3411
+ },
3412
+ "6x3x": {
3413
+ "chain": "D",
3414
+ "organism": "Homo sapiens",
3415
+ "classification": "human"
3416
+ },
3417
+ "7yk4": {
3418
+ "chain": "B",
3419
+ "organism": "Homo sapiens",
3420
+ "classification": "human"
3421
+ },
3422
+ "6qb6": {
3423
+ "chain": "A",
3424
+ "organism": "Homo sapiens",
3425
+ "classification": "human"
3426
+ },
3427
+ "2wuc": {
3428
+ "chain": "A",
3429
+ "organism": "HOMO SAPIENS",
3430
+ "classification": "human"
3431
+ },
3432
+ "6gku": {
3433
+ "chain": "A",
3434
+ "organism": "Homo sapiens",
3435
+ "classification": "human"
3436
+ },
3437
+ "5eii": {
3438
+ "chain": "I",
3439
+ "organism": "Saccharomyces cerevisiae",
3440
+ "classification": "fungal"
3441
+ },
3442
+ "6k7o": {
3443
+ "chain": "P",
3444
+ "organism": "Homo sapiens",
3445
+ "classification": "human"
3446
+ },
3447
+ "8e8s": {
3448
+ "chain": "2",
3449
+ "organism": "Poliovirus 2",
3450
+ "classification": "viral"
3451
+ },
3452
+ "2j88": {
3453
+ "chain": "A",
3454
+ "organism": "APIS MELLIFERA",
3455
+ "classification": "other"
3456
+ },
3457
+ "4cni": {
3458
+ "chain": "D",
3459
+ "organism": "HOMO SAPIENS",
3460
+ "classification": "human"
3461
+ },
3462
+ "7cj2": {
3463
+ "chain": "A",
3464
+ "organism": "Homo sapiens",
3465
+ "classification": "human"
3466
+ },
3467
+ "3u30": {
3468
+ "chain": "A",
3469
+ "organism": "Homo sapiens",
3470
+ "classification": "human"
3471
+ },
3472
+ "8qya": {
3473
+ "chain": "A",
3474
+ "organism": "Homo sapiens",
3475
+ "classification": "human"
3476
+ },
3477
+ "2arj": {
3478
+ "chain": "R",
3479
+ "organism": "Mus musculus",
3480
+ "classification": "other"
3481
+ },
3482
+ "7df1": {
3483
+ "chain": "D",
3484
+ "organism": "Homo sapiens",
3485
+ "classification": "human"
3486
+ },
3487
+ "7dc8": {
3488
+ "chain": "C",
3489
+ "organism": "Homo sapiens",
3490
+ "classification": "human"
3491
+ },
3492
+ "7sem": {
3493
+ "chain": "F",
3494
+ "organism": "Human metapneumovirus",
3495
+ "classification": "viral"
3496
+ },
3497
+ "5f3b": {
3498
+ "chain": "C",
3499
+ "organism": "Homo sapiens",
3500
+ "classification": "human"
3501
+ },
3502
+ "8w84": {
3503
+ "chain": "C",
3504
+ "organism": "Homo sapiens",
3505
+ "classification": "human"
3506
+ },
3507
+ "8f5i": {
3508
+ "chain": "A",
3509
+ "organism": "Rhodopseudomonas palustris",
3510
+ "classification": "bacterial"
3511
+ },
3512
+ "5xxy": {
3513
+ "chain": "A",
3514
+ "organism": "Homo sapiens",
3515
+ "classification": "human"
3516
+ },
3517
+ "6e63": {
3518
+ "chain": "A",
3519
+ "organism": "Plasmodium falciparum",
3520
+ "classification": "other"
3521
+ },
3522
+ "3nps": {
3523
+ "chain": "A",
3524
+ "organism": "Homo sapiens",
3525
+ "classification": "human"
3526
+ },
3527
+ "8yx9": {
3528
+ "chain": "J",
3529
+ "organism": "Homo sapiens",
3530
+ "classification": "human"
3531
+ },
3532
+ "7vux": {
3533
+ "chain": "A",
3534
+ "organism": "Homo sapiens",
3535
+ "classification": "human"
3536
+ },
3537
+ "5hys": {
3538
+ "chain": "G",
3539
+ "organism": "Homo sapiens",
3540
+ "classification": "human"
3541
+ },
3542
+ "9ima": {
3543
+ "chain": "A",
3544
+ "organism": "Homo sapiens",
3545
+ "classification": "human"
3546
+ },
3547
+ "6dfj": {
3548
+ "chain": "E",
3549
+ "organism": "Dengue virus 1",
3550
+ "classification": "viral"
3551
+ },
3552
+ "6flb": {
3553
+ "chain": "G",
3554
+ "organism": "Dengue virus 2",
3555
+ "classification": "viral"
3556
+ },
3557
+ "4f15": {
3558
+ "chain": "J",
3559
+ "organism": "Influenza A virus",
3560
+ "classification": "viral"
3561
+ }
3562
+ }
models/.DS_Store ADDED
Binary file (6.15 kB). View file
 
models/ReCEP/20250626_110438/best_mcc_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1358ed202985dce98a889affa264ff3065bfccc942d7b0043e6f3b0700eb8ea0
3
+ size 19059594
models/ReCEP/20250626_110438/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "in_dim": 2560,
4
+ "rsa": true,
5
+ "dihedral": true,
6
+ "node_dims": [
7
+ 512,
8
+ 256,
9
+ 256
10
+ ],
11
+ "edge_dim": 32,
12
+ "dropout": 0.4,
13
+ "activation": "gelu",
14
+ "residual": true,
15
+ "attention": true,
16
+ "normalize": true,
17
+ "coords_agg": "mean",
18
+ "ffn": true,
19
+ "batch_norm": true,
20
+ "concat": true,
21
+ "addition": false,
22
+ "pooling": "attention",
23
+ "fusion_type": "concat",
24
+ "node_gate": false,
25
+ "node_norm": true,
26
+ "node_layers": 2,
27
+ "out_dropout": 0.2,
28
+ "use_egnn": true,
29
+ "encoder": "esmc"
30
+ },
31
+ "training_config": {
32
+ "num_epoch": 120,
33
+ "batch_size": 64,
34
+ "lr": 5e-05,
35
+ "weight_decay": 1e-05,
36
+ "patience": 15,
37
+ "threshold": 0.5,
38
+ "mixed_precision": false,
39
+ "device_id": 0
40
+ },
41
+ "data_config": {
42
+ "radii": [
43
+ 16,
44
+ 18,
45
+ 20
46
+ ],
47
+ "zero_ratio": 0.3,
48
+ "undersample": 0.5,
49
+ "seed": 42
50
+ },
51
+ "loss_config": {
52
+ "region_loss_type": "mse",
53
+ "reg_weight": 10.0,
54
+ "cls_type": "bce",
55
+ "gamma_high_cls": 2.0,
56
+ "regression_type": "smooth_l1",
57
+ "node_loss_type": "focal",
58
+ "alpha": 2.0,
59
+ "gamma": 2.0,
60
+ "pos_weight": 8.0,
61
+ "node_loss_weight": 0.5,
62
+ "region_weight": 1.0,
63
+ "consistency_weight": 0.3,
64
+ "consistency_type": "mse",
65
+ "label_smoothing": 0.1,
66
+ "gradnorm": true,
67
+ "gradnorm_alpha": 2.0,
68
+ "gradnorm_update_freq": 10
69
+ }
70
+ }
requirements.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt - For Hugging Face Spaces
2
+ numpy==1.26.4
3
+ scipy==1.13.0
4
+ pandas==2.1.4
5
+ scikit-learn==1.5.2
6
+
7
+ torch==2.5.0
8
+ torchvision==0.20.0
9
+ torchaudio==2.5.0
10
+
11
+ torch-geometric==2.6.1
12
+ --find-links https://data.pyg.org/whl/torch-2.5.0+cpu.html
13
+ torch-scatter
14
+ torch-sparse
15
+ torch-cluster
16
+ torch-spline-conv
17
+
18
+ torchtext==0.18.0
19
+ torchmetrics==1.6.0
20
+ torch-optimi==0.2.1
21
+ torch-optimizer==0.3.0
22
+
23
+ esm==3.1.3
24
+
25
+ biopython==1.85
26
+ biotite==0.41.2
27
+
28
+ h5py==3.9.0
29
+ py3Dmol==2.4.2
30
+ matplotlib==3.9.3
31
+ seaborn==0.13.2
32
+ tqdm==4.67.1
33
+ prettytable==3.16.0
34
+
35
+ gradio==4.44.1
36
+ fastapi==0.104.1
37
+ uvicorn==0.24.0
38
+ python-multipart>=0.0.9
39
+ jinja2==3.1.2
40
+ aiofiles==23.2.1
41
+
42
+ cloudpathlib==0.16.0
43
+ requests==2.31.0
44
+ pillow==10.1.0
45
+
46
+ networkx>=2.8
47
+
48
+ notebook==7.3.2
49
+ ipykernel==6.29.5
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/bce.egg-info/PKG-INFO ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: bce
3
+ Version: 0.1
src/bce.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ README.md
2
+ setup.py
3
+ src/bce/__init__.py
4
+ src/bce/check_surf.py
5
+ src/bce/extract_embeddings.py
6
+ src/bce/loss.py
7
+ src/bce/prediction_tools.py
8
+ src/bce.egg-info/PKG-INFO
9
+ src/bce.egg-info/SOURCES.txt
10
+ src/bce.egg-info/dependency_links.txt
11
+ src/bce.egg-info/top_level.txt
12
+ src/bce/antigen/__init__.py
13
+ src/bce/antigen/antigen.py
14
+ src/bce/antigen/pc.py
15
+ src/bce/data/__init__.py
16
+ src/bce/data/data.py
17
+ src/bce/data/data_.py
18
+ src/bce/data/raw_data_generation.py
19
+ src/bce/data/utils.py
20
+ src/bce/data/utils_.py
21
+ src/bce/model/EGNN.py
22
+ src/bce/model/ReGEP.py
23
+ src/bce/model/__init__.py
24
+ src/bce/model/activation.py
25
+ src/bce/model/baseline.py
26
+ src/bce/model/dihedral.py
27
+ src/bce/model/pooling.py
28
+ src/bce/model/scheduler.py
29
+ src/bce/utils/__init__.py
30
+ src/bce/utils/check_struct.py
31
+ src/bce/utils/constants.py
32
+ src/bce/utils/data_tools.py
33
+ src/bce/utils/dssp.py
34
+ src/bce/utils/egnn_trainer.py
35
+ src/bce/utils/evaluatror.py
36
+ src/bce/utils/loading.py
37
+ src/bce/utils/metrics.py
38
+ src/bce/utils/results.py
39
+ src/bce/utils/sequence.py
40
+ src/bce/utils/str_data_tools.py
41
+ src/bce/utils/tools.py
42
+ src/bce/utils/trainer.py
43
+ src/bce/utils/training_tools.py
src/bce.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
src/bce.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ bce
src/bce/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/bce/__init__.py ADDED
File without changes
src/bce/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (159 Bytes). View file
 
src/bce/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (157 Bytes). View file
 
src/bce/__pycache__/loss.cpython-310.pyc ADDED
Binary file (17.9 kB). View file
 
src/bce/__pycache__/loss.cpython-39.pyc ADDED
Binary file (17.8 kB). View file
 
src/bce/antigen/__init__.py ADDED
File without changes
src/bce/antigen/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (175 Bytes). View file
 
src/bce/antigen/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (165 Bytes). View file
 
src/bce/antigen/__pycache__/antigen.cpython-310.pyc ADDED
Binary file (58.2 kB). View file
 
src/bce/antigen/__pycache__/antigen.cpython-39.pyc ADDED
Binary file (58.1 kB). View file
 
src/bce/antigen/__pycache__/pc.cpython-310.pyc ADDED
Binary file (1.15 kB). View file
 
src/bce/antigen/__pycache__/protein_chain.cpython-310.pyc ADDED
Binary file (51.9 kB). View file
 
src/bce/antigen/antigen.py ADDED
The diff for this file is too large to render. See raw diff
 
src/bce/antigen/pc.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Amino acid conversion mappings
2
+ AMINO_ACID_1TO3 = {
3
+ 'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS',
4
+ 'Q': 'GLN', 'E': 'GLU', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE',
5
+ 'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'F': 'PHE', 'P': 'PRO',
6
+ 'S': 'SER', 'T': 'THR', 'W': 'TRP', 'Y': 'TYR', 'V': 'VAL'
7
+ }
8
+
9
+ AMINO_ACID_3TO1 = {
10
+ 'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C',
11
+ 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I',
12
+ 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P',
13
+ 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
14
+ }
15
+
16
+ MAX_ASA = {
17
+ 'ALA': 129.0,
18
+ 'ARG': 274.0,
19
+ 'ASN': 195.0,
20
+ 'ASP': 193.0,
21
+ 'CYS': 167.0,
22
+ 'GLN': 223.0,
23
+ 'GLU': 225.0,
24
+ 'GLY': 104.0,
25
+ 'HIS': 224.0,
26
+ 'ILE': 197.0,
27
+ 'LEU': 201.0,
28
+ 'LYS': 236.0,
29
+ 'MET': 224.0,
30
+ 'PHE': 240.0,
31
+ 'PRO': 159.0,
32
+ 'SER': 155.0,
33
+ 'THR': 172.0,
34
+ 'TRP': 285.0,
35
+ 'TYR': 263.0,
36
+ 'VAL': 174.0,
37
+ }
src/bce/data/__init__.py ADDED
File without changes
src/bce/data/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (164 Bytes). View file
 
src/bce/data/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (162 Bytes). View file
 
src/bce/data/__pycache__/data.cpython-310.pyc ADDED
Binary file (32.4 kB). View file
 
src/bce/data/__pycache__/data.cpython-39.pyc ADDED
Binary file (32.6 kB). View file
 
src/bce/data/__pycache__/dataset.cpython-310.pyc ADDED
Binary file (11.4 kB). View file
 
src/bce/data/__pycache__/dataset_egnn.cpython-310.pyc ADDED
Binary file (15 kB). View file
 
src/bce/data/__pycache__/dataset_final.cpython-310.pyc ADDED
Binary file (8.69 kB). View file
 
src/bce/data/__pycache__/graph_utils.cpython-310.pyc ADDED
Binary file (1.79 kB). View file
 
src/bce/data/__pycache__/utils.cpython-310.pyc ADDED
Binary file (9.3 kB). View file
 
src/bce/data/__pycache__/utils.cpython-39.pyc ADDED
Binary file (9.26 kB). View file
 
src/bce/data/data.py ADDED
@@ -0,0 +1,1262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import h5py
3
+ import torch
4
+ import numpy as np
5
+ import json
6
+ import random
7
+ from pathlib import Path
8
+ from multiprocessing import Pool
9
+
10
+ from typing import List, Tuple, Dict, Optional, Union
11
+ from torch_geometric.data import Data, Dataset, Batch
12
+ from torch_geometric.loader import DataLoader
13
+ from tqdm import tqdm
14
+ import pickle
15
+
16
+ from ..utils.loading import load_epitopes_csv, load_data_split
17
+ from ..utils.constants import BASE_DIR
18
+ from .utils import create_graph_data, create_graph_data_full
19
+
20
+
21
+ def apply_undersample(data_list: List, undersample_param: Union[int, float], seed: int = 42, verbose: bool = True):
22
+ """
23
+ Apply undersampling to a data list.
24
+
25
+ Args:
26
+ data_list: List of data samples
27
+ undersample_param: If int, sample that many samples; if float (0-1), sample that fraction of data
28
+ seed: Random seed for reproducibility
29
+ verbose: Whether to print sampling information
30
+
31
+ Returns:
32
+ Undersampled data list
33
+ """
34
+ if undersample_param is None:
35
+ return data_list
36
+
37
+ original_size = len(data_list)
38
+
39
+ if isinstance(undersample_param, float):
40
+ # Sample a fraction of the data
41
+ if not (0 < undersample_param <= 1.0):
42
+ raise ValueError(f"Float undersample must be between 0 and 1, got {undersample_param}")
43
+ target_size = int(len(data_list) * undersample_param)
44
+ elif isinstance(undersample_param, int):
45
+ # Sample a specific number of samples
46
+ if undersample_param <= 0:
47
+ raise ValueError(f"Int undersample must be positive, got {undersample_param}")
48
+ target_size = min(undersample_param, len(data_list))
49
+ else:
50
+ raise ValueError(f"Undersample must be int, float, or None, got {type(undersample_param)}")
51
+
52
+ if target_size < len(data_list):
53
+ # Set random seed for reproducibility
54
+ random.seed(seed)
55
+ sampled_data = random.sample(data_list, target_size)
56
+
57
+ if verbose:
58
+ print(f"Applied undersampling: {original_size} -> {target_size} samples")
59
+
60
+ return sampled_data
61
+ elif verbose:
62
+ print(f"No undersampling applied: requested {target_size}, available {original_size}")
63
+
64
+ return data_list
65
+
66
+ class AntigenDataset(Dataset):
67
+ """
68
+ Dataset for antigen chains.
69
+ Each data point represents a complete protein as a graph, with nodes being residues
70
+ and edges based on spatial distance (< 18 Å).
71
+ """
72
+ def __init__(
73
+ self,
74
+ data_split: str = "train",
75
+ radius: float = 18,
76
+ threshold: float = 0.25,
77
+ num_posenc: int = 16,
78
+ num_rbf: int = 16,
79
+ undersample: Union[int, float, None] = None,
80
+ cache_dir: Optional[str] = None,
81
+ force_rebuild: bool = False,
82
+ verbose: bool = True,
83
+ seed: int = 42,
84
+ encoder: str = "esmc"
85
+ ):
86
+ """
87
+ Initialize the antigen dataset.
88
+
89
+ Args:
90
+ data_split: Data split name ('train', 'val', 'test')
91
+ radius: Distance threshold for edge creation (Å)
92
+ threshold: SASA threshold for surface residues (not used in full protein)
93
+ num_posenc: Number of positional encoding features
94
+ num_rbf: Number of RBF features
95
+ undersample: Undersample parameter (int for count, float for ratio)
96
+ cache_dir: Directory to cache processed data
97
+ force_rebuild: Whether to force rebuild the dataset
98
+ verbose: Whether to print progress information
99
+ seed: Random seed for reproducibility
100
+ encoder: Encoder type ('esmc' or 'esm2')
101
+ """
102
+ self.data_split = data_split
103
+ self.radius = radius
104
+ self.threshold = threshold
105
+ self.num_posenc = num_posenc
106
+ self.num_rbf = num_rbf
107
+ self.undersample = undersample
108
+ self.verbose = verbose
109
+ self.seed = seed
110
+ self.encoder = encoder
111
+
112
+ # Set cache directory
113
+ if cache_dir is None:
114
+ cache_dir = Path(f"{BASE_DIR}/data/full_region_cache/antigen_r{radius}")
115
+ self.cache_dir = Path(cache_dir)
116
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
117
+
118
+ # Cache file for this configuration
119
+ self.cache_file = self.cache_dir / f"{data_split}_antigen_dataset.h5"
120
+
121
+ # Load data splits and epitope information
122
+ self.antigens = load_data_split(data_split, verbose=verbose)
123
+ _, _, self.epitope_dict = load_epitopes_csv()
124
+
125
+ # Initialize data list
126
+ self.data_list = []
127
+
128
+ # Load or build dataset
129
+ if self.cache_file.exists() and not force_rebuild:
130
+ if verbose:
131
+ print(f"Loading cached antigen dataset from {self.cache_file}")
132
+ self._load_cache()
133
+ else:
134
+ if verbose:
135
+ print(f"Building antigen dataset for {data_split} split...")
136
+ self._build_dataset()
137
+ self._save_cache()
138
+
139
+ super().__init__()
140
+
141
+ def _load_protein_data(self, pdb_id: str, chain_id: str) -> Optional[Dict]:
142
+ """
143
+ Load precomputed protein data from files.
144
+
145
+ Args:
146
+ pdb_id: PDB ID
147
+ chain_id: Chain ID
148
+
149
+ Returns:
150
+ Dictionary containing all protein data or None if loading fails
151
+ """
152
+ try:
153
+ protein_key = f"{pdb_id}_{chain_id}"
154
+
155
+ # Load embeddings
156
+ embedding_file = Path(BASE_DIR) / "data" / "embeddings" / self.encoder / f"{protein_key}.h5"
157
+ if not embedding_file.exists():
158
+ if self.verbose:
159
+ print(f"Embedding file not found: {embedding_file}")
160
+ return None
161
+
162
+ with h5py.File(embedding_file, "r") as h5f:
163
+ embeddings = h5f["embedding"][:]
164
+
165
+ # Load backbone atoms
166
+ coords_file = Path(BASE_DIR) / "data" / "coords" / f"{protein_key}.npy"
167
+ if not coords_file.exists():
168
+ if self.verbose:
169
+ print(f"Coords file not found: {coords_file}")
170
+ return None
171
+ backbone_atoms = np.load(coords_file)
172
+
173
+ # Load RSA values
174
+ rsa_file = Path(BASE_DIR) / "data" / "rsa" / f"{protein_key}.npy"
175
+ if not rsa_file.exists():
176
+ if self.verbose:
177
+ print(f"RSA file not found: {rsa_file}")
178
+ return None
179
+ rsa_values = np.load(rsa_file)
180
+
181
+ # Load epitope data from epitope_dict
182
+ binary_labels = self.epitope_dict.get(protein_key, [])
183
+
184
+ # Create epitope indices from binary labels
185
+ epitope_indices = []
186
+ for idx, is_epitope in enumerate(binary_labels):
187
+ if is_epitope == 1:
188
+ epitope_indices.append(idx)
189
+
190
+ return {
191
+ 'embeddings': embeddings,
192
+ 'backbone_atoms': backbone_atoms,
193
+ 'rsa_values': rsa_values,
194
+ 'epitope_indices': epitope_indices,
195
+ }
196
+
197
+ except Exception as e:
198
+ if self.verbose:
199
+ print(f"Error loading protein data for {pdb_id}_{chain_id}: {str(e)}")
200
+ return None
201
+
202
+ def _build_dataset(self):
203
+ """Build the dataset from precomputed data files."""
204
+ failed_proteins = []
205
+
206
+ for pdb_id, chain_id in tqdm(self.antigens, desc=f"Processing {self.data_split} antigens",
207
+ disable=not self.verbose):
208
+ try:
209
+ # Load precomputed data
210
+ protein_data = self._load_protein_data(pdb_id, chain_id)
211
+ if protein_data is None:
212
+ failed_proteins.append(f"{pdb_id}_{chain_id}")
213
+ continue
214
+
215
+ embeddings = protein_data['embeddings']
216
+ backbone_atoms = protein_data['backbone_atoms']
217
+ rsa_values = protein_data['rsa_values']
218
+ epitope_indices = protein_data['epitope_indices']
219
+
220
+ # Create graph data for the full protein
221
+ graph_data = create_graph_data_full(
222
+ embeddings=embeddings,
223
+ backbone_atoms=backbone_atoms,
224
+ rsa_values=rsa_values,
225
+ epitope_indices=epitope_indices,
226
+ pdb_id=pdb_id,
227
+ chain_id=chain_id,
228
+ num_rbf=self.num_rbf,
229
+ num_posenc=self.num_posenc,
230
+ radius=self.radius,
231
+ verbose=self.verbose
232
+ )
233
+
234
+ if graph_data is not None:
235
+ self.data_list.append(graph_data)
236
+ else:
237
+ failed_proteins.append(f"{pdb_id}_{chain_id}")
238
+
239
+ except Exception as e:
240
+ failed_proteins.append(f"{pdb_id}_{chain_id}")
241
+ if self.verbose:
242
+ print(f"Error processing {pdb_id}_{chain_id}: {str(e)}")
243
+
244
+ if failed_proteins and self.verbose:
245
+ print(f"Failed to process {len(failed_proteins)} proteins: {failed_proteins[:5]}...")
246
+
247
+ # Apply undersampling if specified
248
+ if self.undersample is not None:
249
+ self.data_list = apply_undersample(
250
+ self.data_list,
251
+ self.undersample,
252
+ seed=self.seed,
253
+ verbose=self.verbose
254
+ )
255
+
256
+ if self.verbose:
257
+ print(f"Successfully created {len(self.data_list)} protein graphs")
258
+
259
+ def _save_cache(self):
260
+ """Save processed dataset to cache."""
261
+ try:
262
+ self._save_cache_hdf5()
263
+ if self.verbose:
264
+ print(f"Dataset cached to {self.cache_file}")
265
+ except Exception as e:
266
+ if self.verbose:
267
+ print(f"Failed to save cache: {str(e)}")
268
+
269
+ def _load_cache(self):
270
+ """Load processed dataset from cache."""
271
+ try:
272
+ self._load_cache_hdf5()
273
+ if self.verbose:
274
+ print(f"Loaded {len(self.data_list)} samples from cache")
275
+ except Exception as e:
276
+ if self.verbose:
277
+ print(f"Failed to load cache: {str(e)}")
278
+ self.data_list = []
279
+
280
+ def _save_cache_hdf5(self):
281
+ """Save dataset using HDF5 format."""
282
+ with h5py.File(self.cache_file, 'w') as f:
283
+ # Save metadata
284
+ f.attrs['num_samples'] = len(self.data_list)
285
+ f.attrs['radius'] = self.radius
286
+ f.attrs['threshold'] = self.threshold
287
+ f.attrs['data_split'] = self.data_split
288
+ f.attrs['encoder'] = self.encoder
289
+ f.attrs['dataset_type'] = 'antigen_full'
290
+
291
+ # Save each protein as a separate group
292
+ for i, data in enumerate(tqdm(self.data_list, desc="Saving dataset...", disable=not self.verbose)):
293
+ group = f.create_group(f'protein_{i}')
294
+
295
+ # Save tensors as datasets with compression
296
+ group.create_dataset('x', data=data.x.numpy(), compression='gzip', compression_opts=6)
297
+ group.create_dataset('pos', data=data.pos.numpy(), compression='gzip', compression_opts=6)
298
+ group.create_dataset('rsa', data=data.rsa.numpy(), compression='gzip', compression_opts=6)
299
+ group.create_dataset('edge_index', data=data.edge_index.numpy(), compression='gzip', compression_opts=6)
300
+ group.create_dataset('edge_attr', data=data.edge_attr.numpy(), compression='gzip', compression_opts=6)
301
+ group.create_dataset('y_node', data=data.y_node.numpy(), compression='gzip', compression_opts=6)
302
+
303
+ # Save scalar and list attributes
304
+ group.attrs['pdb_id'] = data.pdb_id.encode('utf-8')
305
+ group.attrs['chain_id'] = data.chain_id.encode('utf-8')
306
+ group.attrs['num_nodes'] = data.num_nodes
307
+ group.attrs['num_epitopes'] = data.num_epitopes
308
+ group.attrs['epitope_ratio'] = data.epitope_ratio
309
+ group.attrs['radius'] = data.radius
310
+
311
+ # Save epitope indices
312
+ group.create_dataset('epitope_indices', data=np.array(data.epitope_indices), compression='gzip', compression_opts=6)
313
+
314
+ def _load_cache_hdf5(self):
315
+ """Load dataset from HDF5 cache."""
316
+ self.data_list = []
317
+
318
+ with h5py.File(self.cache_file, 'r') as f:
319
+ total_samples = f.attrs['num_samples']
320
+
321
+ for i in tqdm(range(total_samples), desc="Loading dataset...", disable=not self.verbose):
322
+ group = f[f'protein_{i}']
323
+ attrs = dict(group.attrs)
324
+
325
+ # Safe string decoding
326
+ def safe_decode(attr):
327
+ val = attrs[attr]
328
+ return val.decode('utf-8') if isinstance(val, bytes) else str(val)
329
+
330
+ data = Data(
331
+ x=torch.tensor(group['x'][:]),
332
+ pos=torch.tensor(group['pos'][:]),
333
+ rsa=torch.tensor(group['rsa'][:]),
334
+ edge_index=torch.tensor(group['edge_index'][:]),
335
+ edge_attr=torch.tensor(group['edge_attr'][:]),
336
+ y_node=torch.tensor(group['y_node'][:]),
337
+ epitope_indices=group['epitope_indices'][:].tolist(),
338
+ pdb_id=safe_decode('pdb_id'),
339
+ chain_id=safe_decode('chain_id'),
340
+ num_nodes=int(attrs['num_nodes']),
341
+ num_epitopes=int(attrs['num_epitopes']),
342
+ epitope_ratio=float(attrs['epitope_ratio']),
343
+ radius=float(attrs['radius'])
344
+ )
345
+ self.data_list.append(data)
346
+
347
+ # Apply undersampling if specified
348
+ if self.undersample is not None:
349
+ self.data_list = apply_undersample(
350
+ self.data_list,
351
+ self.undersample,
352
+ seed=self.seed,
353
+ verbose=self.verbose
354
+ )
355
+
356
+ def len(self) -> int:
357
+ """Return the number of samples in the dataset."""
358
+ return len(self.data_list)
359
+
360
+ def get(self, idx: int) -> Data:
361
+ """Get a sample by index."""
362
+ return self.data_list[idx]
363
+
364
+ def get_stats(self) -> Dict:
365
+ """Get dataset statistics."""
366
+ if not self.data_list:
367
+ return {}
368
+
369
+ # Collect statistics
370
+ num_nodes_list = [data.num_nodes for data in self.data_list]
371
+ num_edges_list = [data.edge_index.shape[1] for data in self.data_list]
372
+ num_epitopes_list = [data.num_epitopes for data in self.data_list]
373
+ epitope_ratio_list = [data.epitope_ratio for data in self.data_list]
374
+
375
+ # Overall statistics
376
+ total_nodes = sum(num_nodes_list)
377
+ total_edges = sum(num_edges_list)
378
+ total_epitopes = sum(num_epitopes_list)
379
+
380
+ stats = {
381
+ 'num_proteins': len(self.data_list),
382
+ 'avg_nodes_per_protein': np.mean(num_nodes_list),
383
+ 'std_nodes_per_protein': np.std(num_nodes_list),
384
+ 'min_nodes_per_protein': np.min(num_nodes_list),
385
+ 'max_nodes_per_protein': np.max(num_nodes_list),
386
+ 'avg_edges_per_protein': np.mean(num_edges_list),
387
+ 'std_edges_per_protein': np.std(num_edges_list),
388
+ 'total_nodes': total_nodes,
389
+ 'total_edges': total_edges,
390
+ 'total_epitopes': total_epitopes,
391
+ 'avg_epitopes_per_protein': np.mean(num_epitopes_list),
392
+ 'avg_epitope_ratio': np.mean(epitope_ratio_list),
393
+ 'overall_epitope_ratio': total_epitopes / total_nodes if total_nodes > 0 else 0,
394
+ }
395
+
396
+ return stats
397
+
398
+ def print_stats(self):
399
+ """Print dataset statistics."""
400
+ stats = self.get_stats()
401
+ if not stats:
402
+ print("No statistics available (empty dataset)")
403
+ return
404
+
405
+ print(f"\n=== {self.data_split.upper()} Antigen Dataset Statistics ===")
406
+ print(f"Number of proteins: {stats['num_proteins']:,}")
407
+ print(f"Average nodes per protein: {stats['avg_nodes_per_protein']:.1f} ± {stats['std_nodes_per_protein']:.1f}")
408
+ print(f"Nodes per protein range: [{stats['min_nodes_per_protein']}, {stats['max_nodes_per_protein']}]")
409
+ print(f"Average edges per protein: {stats['avg_edges_per_protein']:.1f} ± {stats['std_edges_per_protein']:.1f}")
410
+ print(f"Total nodes: {stats['total_nodes']:,}")
411
+ print(f"Total edges: {stats['total_edges']:,}")
412
+ print(f"Total epitope nodes: {stats['total_epitopes']:,}")
413
+ print(f"Average epitopes per protein: {stats['avg_epitopes_per_protein']:.1f}")
414
+ print(f"Average epitope ratio per protein: {stats['avg_epitope_ratio']:.3f}")
415
+ print(f"Overall epitope ratio: {stats['overall_epitope_ratio']:.3f}")
416
+ print("=" * 50)
417
+
418
+
419
+ class SphereGraphDataset(Dataset):
420
+ """
421
+ Optimized graph dataset for training ReGEP model using spherical regions from antigen chains.
422
+ Each graph represents a spherical region centered on a surface residue.
423
+
424
+ Optimizations:
425
+ - Only uses HDF5 format for caching
426
+ - Builds complete dataset without zero_ratio filtering
427
+ - Applies zero_ratio and undersample during loading
428
+ - Faster caching with optimized HDF5 structure
429
+ """
430
+
431
+ def __init__(
432
+ self,
433
+ data_split: str = "train",
434
+ radius: int = 18,
435
+ threshold: float = 0.25,
436
+ num_posenc: int = 16,
437
+ num_rbf: int = 16,
438
+ zero_ratio: float = 0.1,
439
+ undersample: Union[int, float, None] = None,
440
+ cache_dir: Optional[str] = None,
441
+ force_rebuild: bool = False,
442
+ verbose: bool = True,
443
+ seed: int = 42,
444
+ use_embeddings2: bool = False
445
+ ):
446
+ """
447
+ Initialize the spherical graph dataset.
448
+
449
+ Args:
450
+ data_split: Data split name ('train', 'val', 'test')
451
+ radius: Radius for spherical regions
452
+ threshold: SASA threshold for surface residues
453
+ num_posenc: Number of positional encoding features
454
+ num_rbf: Number of RBF features
455
+ zero_ratio: Ratio to downsample graphs with recall=0 (0.3 means keep 30%)
456
+ undersample: Undersample parameter (int for count, float for ratio)
457
+ cache_dir: Directory to cache processed data
458
+ force_rebuild: Whether to force rebuild the dataset
459
+ verbose: Whether to print progress information
460
+ seed: Random seed for reproducibility
461
+ """
462
+ self.data_split = data_split
463
+ self.radius = radius
464
+ self.threshold = threshold
465
+ self.num_posenc = num_posenc
466
+ self.num_rbf = num_rbf
467
+ self.zero_ratio = zero_ratio
468
+ self.undersample = undersample
469
+ self.verbose = verbose
470
+ self.seed = seed
471
+ self.use_embeddings2 = use_embeddings2
472
+
473
+ # Set cache directory to large disk
474
+ if cache_dir is None:
475
+ cache_dir = Path(f"{BASE_DIR}/data/region_cache/sphere_r{radius}")
476
+ self.cache_dir = Path(cache_dir)
477
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
478
+
479
+ # Cache file for this configuration (only HDF5)
480
+ self.cache_file = self.cache_dir / f"{data_split}_dataset_complete.h5"
481
+
482
+ # Load data splits
483
+ self.antigens = load_data_split(data_split, verbose=verbose)
484
+
485
+ # Initialize data list
486
+ self.data_list = []
487
+
488
+ # Load or build dataset
489
+ if self.cache_file.exists() and not force_rebuild:
490
+ if verbose:
491
+ print(f"Loading cached dataset with radius {self.radius} from {self.cache_file}")
492
+ self._load_cache()
493
+ else:
494
+ if verbose:
495
+ print(f"Building complete dataset with radius {self.radius} for {data_split} split...")
496
+ self._build_dataset()
497
+ self._save_cache()
498
+
499
+ super().__init__()
500
+
501
+ def _load_protein_data(self, pdb_id: str, chain_id: str) -> Optional[Dict]:
502
+ """
503
+ Load precomputed protein data from files.
504
+
505
+ Args:
506
+ pdb_id: PDB ID
507
+ chain_id: Chain ID
508
+
509
+ Returns:
510
+ Dictionary containing all protein data or None if loading fails
511
+ """
512
+ try:
513
+ protein_key = f"{pdb_id}_{chain_id}"
514
+
515
+ # Load embeddings
516
+ embedding_file = Path(BASE_DIR) / "data" / "embeddings" / 'esmc' / f"{protein_key}.h5"
517
+ if not embedding_file.exists():
518
+ if self.verbose:
519
+ print(f"Embedding file not found: {embedding_file}")
520
+ return None
521
+
522
+ with h5py.File(embedding_file, "r") as h5f:
523
+ embeddings = h5f["embedding"][:]
524
+
525
+ # Load other embeddings if available
526
+ esm2_file = Path(BASE_DIR) / "data" / "embeddings" / "esm2" / f"{protein_key}.h5"
527
+ if not esm2_file.exists():
528
+ if self.verbose:
529
+ print(f"ESM2 file not found: {esm2_file}")
530
+ embeddings2 = None
531
+ else:
532
+ with h5py.File(esm2_file, "r") as h5f:
533
+ embeddings2 = h5f["embedding"][:]
534
+
535
+ # Load backbone atoms
536
+ coords_file = Path(BASE_DIR) / "data" / "coords" / f"{protein_key}.npy"
537
+ if not coords_file.exists():
538
+ if self.verbose:
539
+ print(f"Coords file not found: {coords_file}")
540
+ return None
541
+ backbone_atoms = np.load(coords_file)
542
+
543
+ # Load RSA values
544
+ rsa_file = Path(BASE_DIR) / "data" / "rsa" / f"{protein_key}.npy"
545
+ if not rsa_file.exists():
546
+ if self.verbose:
547
+ print(f"RSA file not found: {rsa_file}")
548
+ return None
549
+ rsa_values = np.load(rsa_file)
550
+
551
+ # Load surface coverage data
552
+ sphere_file = Path(BASE_DIR) / "data" / "antigen_sphere" / f"{protein_key}.h5"
553
+ radius_key = f"r{self.radius}"
554
+
555
+ if not sphere_file.exists():
556
+ if self.verbose:
557
+ print(f"Sphere file not found: {sphere_file}")
558
+ return None
559
+
560
+ coverage_dict = {}
561
+ with h5py.File(sphere_file, "r") as h5f:
562
+ if radius_key not in h5f:
563
+ if self.verbose:
564
+ print(f"Radius {self.radius} not found in {sphere_file}")
565
+ return None
566
+
567
+ radius_group = h5f[radius_key]
568
+ for center_idx_str in radius_group.keys():
569
+ center_idx = int(center_idx_str)
570
+ center_group = radius_group[center_idx_str]
571
+ covered_indices = center_group['covered_indices'][:].tolist()
572
+ covered_epitope_indices = center_group['covered_epitope_indices'][:].tolist()
573
+ precision = float(center_group.attrs['precision'])
574
+ recall = float(center_group.attrs['recall'])
575
+ coverage_dict[center_idx] = (covered_indices, covered_epitope_indices, precision, recall)
576
+
577
+ # Load epitope data
578
+ _, _, epitopes = load_epitopes_csv()
579
+ binary_labels = epitopes.get(protein_key, [])
580
+
581
+ # Create epitope indices
582
+ epitope_indices = []
583
+ for idx, is_epitope in enumerate(binary_labels):
584
+ if is_epitope == 1:
585
+ epitope_indices.append(idx)
586
+
587
+ return {
588
+ 'embeddings': embeddings,
589
+ 'backbone_atoms': backbone_atoms,
590
+ 'rsa_values': rsa_values,
591
+ 'coverage_dict': coverage_dict,
592
+ 'epitope_indices': epitope_indices,
593
+ 'embeddings2': embeddings2
594
+ }
595
+
596
+ except Exception as e:
597
+ if self.verbose:
598
+ print(f"Error loading protein data for {pdb_id}_{chain_id}: {str(e)}")
599
+ return None
600
+
601
+ def _build_dataset(self):
602
+ """Build the complete dataset from precomputed data files (no zero_ratio filtering)."""
603
+ failed_proteins = []
604
+
605
+ for pdb_id, chain_id in tqdm(self.antigens, desc=f"Processing {self.data_split} antigens",
606
+ disable=not self.verbose):
607
+ try:
608
+ # Load precomputed data directly
609
+ protein_data = self._load_protein_data(pdb_id, chain_id)
610
+ if protein_data is None:
611
+ if self.verbose:
612
+ print(f"Failed to load data for {pdb_id}_{chain_id}")
613
+ continue
614
+
615
+ embeddings = protein_data['embeddings']
616
+ embeddings2 = protein_data['embeddings2']
617
+ backbone_atoms = protein_data['backbone_atoms']
618
+ rsa_values = protein_data['rsa_values']
619
+ coverage_dict = protein_data['coverage_dict']
620
+ epitope_indices = protein_data['epitope_indices']
621
+
622
+ if not coverage_dict:
623
+ if self.verbose:
624
+ print(f"No surface regions found for {pdb_id}_{chain_id}")
625
+ continue
626
+
627
+ # Process each spherical region (no zero_ratio filtering here)
628
+ for center_idx, (covered_indices, covered_epitope_indices, precision, recall) in coverage_dict.items():
629
+ if len(covered_indices) < 2: # Skip regions with too few residues
630
+ continue
631
+
632
+ # Create graph data for this region (include all data)
633
+ graph_data = create_graph_data(
634
+ center_idx=center_idx,
635
+ covered_indices=covered_indices,
636
+ covered_epitope_indices=covered_epitope_indices,
637
+ embeddings=embeddings,
638
+ embeddings2=embeddings2,
639
+ backbone_atoms=backbone_atoms,
640
+ rsa_values=rsa_values,
641
+ epitope_indices=epitope_indices,
642
+ recall=recall,
643
+ precision=precision,
644
+ pdb_id=pdb_id,
645
+ chain_id=chain_id,
646
+ num_rbf=self.num_rbf,
647
+ num_posenc=self.num_posenc,
648
+ verbose=self.verbose
649
+ )
650
+
651
+ if graph_data is not None:
652
+ self.data_list.append(graph_data)
653
+
654
+ except Exception as e:
655
+ failed_proteins.append(f"{pdb_id}_{chain_id}")
656
+ if self.verbose:
657
+ print(f"Error processing {pdb_id}_{chain_id}: {str(e)}")
658
+
659
+ if failed_proteins and self.verbose:
660
+ print(f"Failed to process {len(failed_proteins)} proteins: {failed_proteins[:5]}...")
661
+
662
+ if self.verbose:
663
+ print(f"Successfully created {len(self.data_list)} graph samples (complete dataset)")
664
+
665
+ def _save_cache(self):
666
+ """Save processed dataset to cache."""
667
+ try:
668
+ self._save_cache_hdf5()
669
+ if self.verbose:
670
+ print(f"Dataset cached to {self.cache_file}")
671
+ except Exception as e:
672
+ if self.verbose:
673
+ print(f"Failed to save cache: {str(e)}")
674
+
675
+ def _load_cache(self):
676
+ """Load processed dataset from cache."""
677
+ try:
678
+ self._load_cache_hdf5()
679
+ if self.verbose:
680
+ print(f"Loaded {len(self.data_list)} samples from cache")
681
+ except Exception as e:
682
+ if self.verbose:
683
+ print(f"Failed to load cache: {str(e)}")
684
+ self.data_list = []
685
+
686
+ def _save_cache_hdf5(self):
687
+ """Save dataset using optimized HDF5 format for faster loading."""
688
+ with h5py.File(self.cache_file, 'w') as f:
689
+ # Save metadata
690
+ f.attrs['num_samples'] = len(self.data_list)
691
+ f.attrs['radius'] = self.radius
692
+ f.attrs['threshold'] = self.threshold
693
+ f.attrs['data_split'] = self.data_split
694
+ f.attrs['complete_dataset'] = True # Mark as complete dataset
695
+
696
+ # Pre-allocate arrays for better performance
697
+ num_samples = len(self.data_list)
698
+ if num_samples == 0:
699
+ return
700
+
701
+ # Collect all data first to determine max dimensions
702
+ all_x = []
703
+ all_pos = []
704
+ all_rsa = []
705
+ all_edge_index = []
706
+ all_edge_attr = []
707
+ all_y = []
708
+ all_y_node = []
709
+ all_center_idx = []
710
+ all_precision = []
711
+ all_pdb_ids = []
712
+ all_chain_ids = []
713
+ all_num_nodes = []
714
+ all_covered_indices = []
715
+ all_embeddings2 = []
716
+
717
+ max_nodes = 0
718
+ max_edges = 0
719
+
720
+ for data in self.data_list:
721
+ all_x.append(data.x.numpy())
722
+ all_pos.append(data.pos.numpy())
723
+ all_rsa.append(data.rsa.numpy())
724
+ all_edge_index.append(data.edge_index.numpy())
725
+ all_edge_attr.append(data.edge_attr.numpy())
726
+ all_y.append(data.y.numpy())
727
+ all_y_node.append(data.y_node.numpy())
728
+ all_center_idx.append(data.center_idx)
729
+ all_precision.append(data.precision)
730
+ all_pdb_ids.append(data.pdb_id.encode('utf-8'))
731
+ all_chain_ids.append(data.chain_id.encode('utf-8'))
732
+ all_num_nodes.append(data.num_nodes)
733
+ all_covered_indices.append(data.covered_indices)
734
+
735
+ # Handle embeddings2 safely - it could be None or numpy array
736
+ if hasattr(data, 'embeddings2') and data.embeddings2 is not None:
737
+ if isinstance(data.embeddings2, np.ndarray):
738
+ all_embeddings2.append(data.embeddings2)
739
+ else:
740
+ # It's a torch tensor
741
+ all_embeddings2.append(data.embeddings2.numpy())
742
+ else:
743
+ # No embeddings2 available, use zeros as placeholder
744
+ all_embeddings2.append(np.zeros((data.num_nodes, 1280), dtype=np.float32)) # ESM2 dim
745
+
746
+ max_nodes = max(max_nodes, data.num_nodes)
747
+ max_edges = max(max_edges, data.edge_index.shape[1])
748
+
749
+ # Save each graph as a separate group with compression
750
+ progress_bar = tqdm(enumerate(self.data_list), total=num_samples, desc="Saving dataset...", disable=not self.verbose)
751
+
752
+ for i, data in progress_bar:
753
+ group = f.create_group(f'graph_{i}')
754
+
755
+ # Save tensors as datasets with compression
756
+ group.create_dataset('x', data=all_x[i], compression='gzip', compression_opts=6)
757
+ group.create_dataset('pos', data=all_pos[i], compression='gzip', compression_opts=6)
758
+ group.create_dataset('rsa', data=all_rsa[i], compression='gzip', compression_opts=6)
759
+ group.create_dataset('edge_index', data=all_edge_index[i], compression='gzip', compression_opts=6)
760
+ group.create_dataset('edge_attr', data=all_edge_attr[i], compression='gzip', compression_opts=6)
761
+ group.create_dataset('y', data=all_y[i], compression='gzip', compression_opts=6)
762
+ group.create_dataset('y_node', data=all_y_node[i], compression='gzip', compression_opts=6)
763
+ group.create_dataset('embeddings2', data=all_embeddings2[i], compression='gzip', compression_opts=6)
764
+
765
+ # Save scalar attributes
766
+ group.attrs['center_idx'] = all_center_idx[i]
767
+ group.attrs['precision'] = all_precision[i]
768
+ group.attrs['pdb_id'] = all_pdb_ids[i]
769
+ group.attrs['chain_id'] = all_chain_ids[i]
770
+ group.attrs['num_nodes'] = all_num_nodes[i]
771
+
772
+ # Save list attributes as datasets with compression
773
+ group.create_dataset('covered_indices', data=np.array(all_covered_indices[i]), compression='gzip', compression_opts=6)
774
+
775
+ def _load_cache_hdf5(self):
776
+ """Optimized cache loader with robust string handling."""
777
+ self.data_list = []
778
+
779
+ with h5py.File(self.cache_file, 'r') as f:
780
+ # PHASE 1: Rapid metadata scan
781
+ zero_recall_indices = []
782
+ non_zero_recall_indices = []
783
+ total_samples = f.attrs['num_samples']
784
+
785
+ if self.verbose:
786
+ print(f"Scanning {total_samples} samples for recall values...")
787
+
788
+ for i in range(total_samples):
789
+ recall = f[f'graph_{i}/y'][0].item()
790
+ if recall == 0.0:
791
+ zero_recall_indices.append(i)
792
+ else:
793
+ non_zero_recall_indices.append(i)
794
+
795
+ # PHASE 2: Apply zero_ratio filtering
796
+ selected_indices = non_zero_recall_indices.copy()
797
+
798
+ if isinstance(self.zero_ratio, (int, float)) and 0 <= self.zero_ratio <= 1:
799
+ if self.zero_ratio < 1.0 and zero_recall_indices:
800
+ random.seed(self.seed)
801
+ target_count = int(len(zero_recall_indices) * self.zero_ratio)
802
+ selected_zero_indices = random.sample(zero_recall_indices, target_count)
803
+ selected_indices.extend(selected_zero_indices)
804
+
805
+ if self.verbose:
806
+ kept = len(selected_zero_indices)
807
+ total = len(zero_recall_indices)
808
+ print(f"Zero-recall filtering: kept {kept}/{total} samples (ratio={self.zero_ratio})")
809
+ else:
810
+ selected_indices.extend(zero_recall_indices)
811
+
812
+ # PHASE 3: Selective data loading with safe string handling
813
+ if self.verbose:
814
+ print(f"Loading {len(selected_indices)} selected samples...")
815
+
816
+ for idx in tqdm(selected_indices, disable=not self.verbose):
817
+ group = f[f'graph_{idx}']
818
+ attrs = dict(group.attrs)
819
+
820
+ # Safe string decoding
821
+ def safe_decode(attr):
822
+ val = attrs[attr]
823
+ return val.decode('utf-8') if isinstance(val, bytes) else str(val)
824
+
825
+ # Load embeddings2 if available and use_embeddings2 is True
826
+ if 'embeddings2' in group and self.use_embeddings2:
827
+ if group['embeddings2'] is not None:
828
+ emb = torch.tensor(group['embeddings2'][:])
829
+ else:
830
+ emb = torch.tensor(group['x'][:])
831
+ else:
832
+ emb = torch.tensor(group['x'][:])
833
+
834
+ data = Data(
835
+ x=emb,
836
+ pos=torch.tensor(group['pos'][:]),
837
+ rsa=torch.tensor(group['rsa'][:]),
838
+ edge_index=torch.tensor(group['edge_index'][:]),
839
+ edge_attr=torch.tensor(group['edge_attr'][:]),
840
+ y=torch.tensor(group['y'][:]),
841
+ y_node=torch.tensor(group['y_node'][:]),
842
+ center_idx=int(attrs['center_idx']),
843
+ covered_indices=group['covered_indices'][:].tolist(),
844
+ precision=float(attrs['precision']),
845
+ pdb_id=safe_decode('pdb_id'),
846
+ chain_id=safe_decode('chain_id'),
847
+ num_nodes=int(attrs['num_nodes'])
848
+ )
849
+
850
+ self.data_list.append(data)
851
+
852
+ # PHASE 4: Apply undersampling
853
+ if self.undersample is not None:
854
+ self.data_list = apply_undersample(
855
+ self.data_list,
856
+ self.undersample,
857
+ seed=self.seed,
858
+ verbose=self.verbose
859
+ )
860
+
861
+ if self.verbose:
862
+ print(f"Loaded {len(self.data_list)} samples (optimized loader)")
863
+
864
+ def len(self) -> int:
865
+ """Return the number of samples in the dataset."""
866
+ return len(self.data_list)
867
+
868
+ def get(self, idx: int) -> Data:
869
+ """Get a sample by index."""
870
+ return self.data_list[idx]
871
+
872
+ def apply_filters(self, zero_ratio: Optional[float] = None, undersample: Union[int, float, None] = None, seed: int = None):
873
+ """
874
+ Apply filtering to the already loaded dataset (for compatibility).
875
+ Note: It's more efficient to set these parameters during initialization.
876
+
877
+ Args:
878
+ zero_ratio: Ratio to downsample graphs with recall=0
879
+ undersample: Undersample parameter
880
+ seed: Random seed for reproducibility
881
+ """
882
+ if seed is None:
883
+ seed = self.seed
884
+
885
+ # Update instance parameters and re-filter
886
+ if zero_ratio is not None:
887
+ self.zero_ratio = zero_ratio
888
+ if undersample is not None:
889
+ self.undersample = undersample
890
+ if seed is not None:
891
+ self.seed = seed
892
+
893
+ # Reload from cache with new parameters
894
+ if self.cache_file.exists():
895
+ if self.verbose:
896
+ print("Re-applying filters to cached dataset...")
897
+ self._load_cache_hdf5()
898
+ else:
899
+ if self.verbose:
900
+ print("Warning: No cache file found, filters cannot be applied")
901
+
902
+ def get_stats(self) -> Dict:
903
+ """Get dataset statistics."""
904
+ if not self.data_list:
905
+ return {}
906
+
907
+ # Collect statistics
908
+ num_nodes_list = [data.num_nodes for data in self.data_list]
909
+ recall_list = [data.y.item() for data in self.data_list]
910
+ precision_list = [data.precision for data in self.data_list]
911
+
912
+ # Node-level statistics
913
+ total_nodes = sum(num_nodes_list)
914
+ total_epitopes = sum([data.y_node.sum().item() for data in self.data_list])
915
+ num_zero_recall = sum([1 for data in self.data_list if data.y.item() == 0])
916
+
917
+ stats = {
918
+ 'num_graphs': len(self.data_list),
919
+ 'avg_nodes_per_graph': np.mean(num_nodes_list),
920
+ 'std_nodes_per_graph': np.std(num_nodes_list),
921
+ 'min_nodes_per_graph': np.min(num_nodes_list),
922
+ 'max_nodes_per_graph': np.max(num_nodes_list),
923
+ 'total_nodes': total_nodes,
924
+ 'total_epitopes': total_epitopes,
925
+ 'epitope_ratio': total_epitopes / total_nodes if total_nodes > 0 else 0,
926
+ 'avg_recall': np.mean(recall_list),
927
+ 'std_recall': np.std(recall_list),
928
+ 'avg_precision': np.mean(precision_list),
929
+ 'std_precision': np.std(precision_list),
930
+ 'num_zero_recall': num_zero_recall,
931
+ }
932
+
933
+ return stats
934
+
935
+ def print_stats(self):
936
+ """Print dataset statistics."""
937
+ stats = self.get_stats()
938
+ if not stats:
939
+ print("No statistics available (empty dataset)")
940
+ return
941
+
942
+ print(f"\n=== {self.data_split.upper()} Dataset Statistics ===")
943
+ print(f"Number of graphs: {stats['num_graphs']:,}")
944
+ print(f"Average nodes per graph: {stats['avg_nodes_per_graph']:.1f} ± {stats['std_nodes_per_graph']:.1f}")
945
+ print(f"Nodes per graph range: [{stats['min_nodes_per_graph']}, {stats['max_nodes_per_graph']}]")
946
+ print(f"Total nodes: {stats['total_nodes']:,}")
947
+ print(f"Total epitope nodes: {stats['total_epitopes']:,}")
948
+ print(f"Epitope ratio: {stats['epitope_ratio']:.3f}")
949
+ print(f"Average recall: {stats['avg_recall']:.3f} ± {stats['std_recall']:.3f}")
950
+ print(f"Average precision: {stats['avg_precision']:.3f} ± {stats['std_precision']:.3f}")
951
+ print(f"Number of graphs with zero recall: {stats['num_zero_recall']:,}")
952
+ print("=" * 40)
953
+
954
+
955
+ class MultiRadiusGraphDataset(Dataset):
956
+ """
957
+ Dataset that combines multiple radius datasets for multi-scale training.
958
+ """
959
+
960
+ def __init__(
961
+ self,
962
+ data_split: str = "train",
963
+ radii: List[int] = [16, 18, 20],
964
+ threshold: float = 0.25,
965
+ num_posenc: int = 16,
966
+ num_rbf: int = 16,
967
+ zero_ratio: float = 0.1,
968
+ undersample: Union[int, float, None] = None,
969
+ cache_dir: Optional[str] = None,
970
+ force_rebuild: bool = False,
971
+ verbose: bool = True,
972
+ use_embeddings2: bool = False
973
+ ):
974
+ """
975
+ Initialize multi-radius dataset.
976
+
977
+ Args:
978
+ data_split: Data split name
979
+ radii: List of radii to use
980
+ threshold: SASA threshold for surface residues
981
+ num_posenc: Number of positional encoding features
982
+ num_rbf: Number of RBF features
983
+ zero_ratio: Ratio to downsample graphs with recall=0
984
+ undersample: Undersample parameter (int for count, float for ratio)
985
+ cache_dir: Directory to cache processed data
986
+ force_rebuild: Whether to force rebuild the dataset
987
+ verbose: Whether to print progress information
988
+ """
989
+ self.data_split = data_split
990
+ self.radii = radii
991
+ self.verbose = verbose
992
+
993
+ # Create individual datasets
994
+ self.datasets = []
995
+ for radius in radii:
996
+ dataset = SphereGraphDataset(
997
+ data_split=data_split,
998
+ radius=radius,
999
+ threshold=threshold,
1000
+ num_posenc=num_posenc,
1001
+ num_rbf=num_rbf,
1002
+ zero_ratio=zero_ratio,
1003
+ undersample=undersample,
1004
+ cache_dir=cache_dir,
1005
+ force_rebuild=force_rebuild,
1006
+ verbose=verbose,
1007
+ use_embeddings2=use_embeddings2
1008
+ )
1009
+ self.datasets.append(dataset)
1010
+
1011
+ # Combine all data
1012
+ self.data_list = []
1013
+ for dataset in self.datasets:
1014
+ self.data_list.extend(dataset.data_list)
1015
+
1016
+ if verbose:
1017
+ print(f"Combined {len(self.datasets)} datasets with {len(self.data_list)} total samples")
1018
+
1019
+ super().__init__()
1020
+
1021
+ def len(self) -> int:
1022
+ return len(self.data_list)
1023
+
1024
+ def get(self, idx: int) -> Data:
1025
+ return self.data_list[idx]
1026
+
1027
+ def apply_filters(self, undersample: Union[int, float, None] = None, seed: int = 42):
1028
+ """
1029
+ Apply filtering to the loaded multi-radius dataset.
1030
+
1031
+ Args:
1032
+ undersample: Undersample parameter (int for count, float for ratio)
1033
+ seed: Random seed for reproducibility
1034
+ """
1035
+ if undersample is not None:
1036
+ original_size = len(self.data_list)
1037
+ self.data_list = apply_undersample(self.data_list, undersample, seed=seed, verbose=True)
1038
+
1039
+ def get_stats(self) -> Dict:
1040
+ """Get combined dataset statistics."""
1041
+ if not self.data_list:
1042
+ return {}
1043
+
1044
+ # Collect statistics
1045
+ num_nodes_list = [data.num_nodes for data in self.data_list]
1046
+ recall_list = [data.y.item() for data in self.data_list]
1047
+
1048
+ # Node-level statistics
1049
+ total_nodes = sum(num_nodes_list)
1050
+ total_epitopes = sum([data.y_node.sum().item() for data in self.data_list])
1051
+
1052
+ stats = {
1053
+ 'num_graphs': len(self.data_list),
1054
+ 'num_radii': len(self.radii),
1055
+ 'radii': self.radii,
1056
+ 'avg_nodes_per_graph': np.mean(num_nodes_list),
1057
+ 'std_nodes_per_graph': np.std(num_nodes_list),
1058
+ 'min_nodes_per_graph': np.min(num_nodes_list),
1059
+ 'max_nodes_per_graph': np.max(num_nodes_list),
1060
+ 'total_nodes': total_nodes,
1061
+ 'total_epitopes': total_epitopes,
1062
+ 'epitope_ratio': total_epitopes / total_nodes if total_nodes > 0 else 0,
1063
+ 'avg_recall': np.mean(recall_list),
1064
+ }
1065
+
1066
+ return stats
1067
+
1068
+ def print_stats(self):
1069
+ """Print dataset statistics."""
1070
+ stats = self.get_stats()
1071
+ if not stats:
1072
+ print("No statistics available (empty dataset)")
1073
+ return
1074
+
1075
+ print(f"\n=== {self.data_split.upper()} Dataset Statistics ===")
1076
+ print(f"Number of graphs: {stats['num_graphs']:,}")
1077
+ print(f"Average nodes per graph: {stats['avg_nodes_per_graph']:.1f} ± {stats['std_nodes_per_graph']:.1f}")
1078
+ print(f"Nodes per graph range: [{stats['min_nodes_per_graph']}, {stats['max_nodes_per_graph']}]")
1079
+ print(f"Total nodes: {stats['total_nodes']:,}")
1080
+ print(f"Total epitope nodes: {stats['total_epitopes']:,}")
1081
+ print(f"Epitope ratio: {stats['epitope_ratio']:.3f}")
1082
+ print(f"Average recall: {stats['avg_recall']:.3f} ± {stats['std_recall']:.3f}")
1083
+ print(f"Average precision: {stats['avg_precision']:.3f} ± {stats['std_precision']:.3f}")
1084
+ print("=" * 40)
1085
+
1086
+
1087
+
1088
+ # Utility functions for dataset creation and management
1089
+ def create_datasets(
1090
+ radii: List[int] = [16, 18, 20],
1091
+ splits: List[str] = ["train", "test"],
1092
+ threshold: float = 0.25,
1093
+ zero_ratio: float = None,
1094
+ undersample: Union[int, float, None] = None,
1095
+ cache_dir: Optional[str] = None,
1096
+ force_rebuild: bool = False,
1097
+ verbose: bool = False,
1098
+ seed: int = 42,
1099
+ use_embeddings2: bool = False,
1100
+ ) -> Dict[str, SphereGraphDataset]:
1101
+ """
1102
+ Create optimized datasets for all splits and radii.
1103
+
1104
+ Args:
1105
+ radii: List of radii to use
1106
+ splits: List of data splits to create
1107
+ threshold: SASA threshold for surface residues
1108
+ zero_ratio: Ratio to downsample graphs with recall=0
1109
+ undersample: Undersample parameter (int for count, float for ratio)
1110
+ cache_dir: Directory to cache processed data
1111
+ force_rebuild: Whether to force rebuild datasets
1112
+ verbose: Whether to print progress information
1113
+ seed: Random seed for reproducibility
1114
+
1115
+ Returns:
1116
+ Dictionary mapping split names to datasets
1117
+ """
1118
+ datasets = {}
1119
+
1120
+ for split in splits:
1121
+ if len(radii) == 1:
1122
+ # Single radius dataset
1123
+ dataset = SphereGraphDataset(
1124
+ data_split=split,
1125
+ radius=radii[0],
1126
+ threshold=threshold,
1127
+ zero_ratio=zero_ratio,
1128
+ undersample=undersample,
1129
+ cache_dir=cache_dir,
1130
+ force_rebuild=force_rebuild,
1131
+ verbose=verbose,
1132
+ seed=seed,
1133
+ use_embeddings2=use_embeddings2
1134
+ )
1135
+ if verbose:
1136
+ dataset.print_stats()
1137
+ else:
1138
+ # Multi-radius dataset
1139
+ dataset = MultiRadiusGraphDataset(
1140
+ data_split=split,
1141
+ radii=radii,
1142
+ threshold=threshold,
1143
+ zero_ratio=zero_ratio,
1144
+ undersample=undersample,
1145
+ cache_dir=cache_dir,
1146
+ force_rebuild=force_rebuild,
1147
+ verbose=verbose,
1148
+ use_embeddings2=use_embeddings2
1149
+ )
1150
+
1151
+ datasets[split] = dataset
1152
+
1153
+ return datasets
1154
+
1155
+
1156
+ def custom_collate_fn(batch):
1157
+ """
1158
+ Custom collate function for ReGEP model.
1159
+ Converts PyG Data objects to the format expected by ReGEP.
1160
+ """
1161
+ # Use PyG's default batching
1162
+ batched_data = Batch.from_data_list(batch)
1163
+
1164
+ # ReGEP expects the input features to be concatenated
1165
+ # x: [N_total, embed_dim], rsa: [N_total], ss: [N_total, 2]
1166
+ # The model will concatenate them internally: [x, rsa, ss] -> [N_total, embed_dim + 3]
1167
+
1168
+ return batched_data
1169
+
1170
+
1171
+ class ReGEPDataLoader(DataLoader):
1172
+ """
1173
+ Custom DataLoader for ReGEP model that handles the specific input format.
1174
+ Supports undersampling at the DataLoader level.
1175
+ """
1176
+
1177
+ def __init__(self, dataset, batch_size=32, shuffle=True, **kwargs):
1178
+ """
1179
+ Initialize ReGEP DataLoader with optional undersampling.
1180
+
1181
+ Args:
1182
+ dataset: The dataset to load from
1183
+ batch_size: Batch size
1184
+ shuffle: Whether to shuffle the data
1185
+ **kwargs: Additional arguments for DataLoader
1186
+ """
1187
+ # Set default collate_fn if not provided
1188
+ if 'collate_fn' not in kwargs:
1189
+ kwargs['collate_fn'] = custom_collate_fn
1190
+
1191
+ super().__init__(
1192
+ dataset=dataset,
1193
+ batch_size=batch_size,
1194
+ shuffle=shuffle,
1195
+ **kwargs
1196
+ )
1197
+
1198
+ def create_data_loader(
1199
+ radii=[16, 18, 20],
1200
+ batch_size=32,
1201
+ zero_ratio=0.1,
1202
+ undersample=0.5,
1203
+ seed=42,
1204
+ verbose=False,
1205
+ use_embeddings2=False,
1206
+ **kwargs
1207
+ ):
1208
+ """
1209
+ Create train and test data loaders.
1210
+
1211
+ Args:
1212
+ radii (list): List of radii for data processing
1213
+ batch_size (int): Batch size for training
1214
+ zero_ratio (float): Ratio of zero samples for training
1215
+ undersample (float): Undersampling ratio for training
1216
+ seed (int): Random seed
1217
+ verbose (bool): Whether to print verbose information
1218
+ **kwargs: Additional arguments for data loader
1219
+
1220
+ Returns:
1221
+ tuple: (train_loader, test_loader)
1222
+ """
1223
+ train_dataset = create_datasets(
1224
+ radii=radii,
1225
+ splits=["train"],
1226
+ threshold=0.25,
1227
+ undersample=undersample,
1228
+ zero_ratio=zero_ratio,
1229
+ cache_dir=None,
1230
+ seed=seed,
1231
+ verbose=verbose,
1232
+ use_embeddings2=use_embeddings2
1233
+ )["train"]
1234
+
1235
+ test_dataset = create_datasets(
1236
+ radii=radii,
1237
+ splits=["test"],
1238
+ threshold=0.25,
1239
+ undersample=None,
1240
+ zero_ratio=None,
1241
+ cache_dir=None,
1242
+ verbose=verbose,
1243
+ use_embeddings2=use_embeddings2
1244
+ )["test"]
1245
+
1246
+ train_loader = ReGEPDataLoader(
1247
+ train_dataset,
1248
+ batch_size=batch_size,
1249
+ shuffle=True,
1250
+ collate_fn=custom_collate_fn,
1251
+ **kwargs
1252
+ )
1253
+
1254
+ test_loader = ReGEPDataLoader(
1255
+ test_dataset,
1256
+ batch_size=batch_size*4,
1257
+ shuffle=False,
1258
+ **kwargs
1259
+ )
1260
+
1261
+ return train_loader, test_loader
1262
+
src/bce/data/utils.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simplified graph utilities for SphereGraphDataset.
3
+ Contains only the essential functions needed without external dependencies.
4
+ """
5
+
6
+ import numpy as np
7
+ from pathlib import Path
8
+ from typing import List, Optional, Tuple
9
+
10
+ import torch
11
+ from torch_geometric.data import Data
12
+ from torch_geometric.nn import radius_graph
13
+
14
+ def parse_binding_site_txt(txt_path: Path) -> Tuple[List[str], List[str], List[torch.Tensor]]:
15
+ """
16
+ Parse a fasta-like txt file with 3 lines per entry: >id, sequence, label
17
+ Returns: (rna_ids, rna_seqs, labels)
18
+ """
19
+ rna_ids, rna_seqs, labels = [], [], []
20
+ with open(txt_path, 'r') as f:
21
+ lines = [line.strip() for line in f if line.strip()]
22
+ for i in range(0, len(lines), 3):
23
+ rna_id = lines[i][1:] if lines[i].startswith('>') else lines[i]
24
+ seq = lines[i+1]
25
+ label_str = lines[i+2]
26
+ label = torch.tensor([int(x) for x in label_str], dtype=torch.float32)
27
+ rna_ids.append(rna_id)
28
+ rna_seqs.append(seq)
29
+ labels.append(label)
30
+ return rna_ids, rna_seqs, labels
31
+
32
+ def create_graph_data_full(
33
+ embeddings: np.ndarray,
34
+ backbone_atoms: np.ndarray,
35
+ rsa_values: np.ndarray,
36
+ epitope_indices: List[int],
37
+ pdb_id: str,
38
+ chain_id: str,
39
+ num_rbf: int = 16,
40
+ num_posenc: int = 16,
41
+ radius: float = 18.0,
42
+ verbose: bool = True
43
+ ) -> Optional[Data]:
44
+ """
45
+ Create a PyTorch Geometric Data object for a full protein graph.
46
+
47
+ Args:
48
+ embeddings: Full protein embeddings [seq_len, embed_dim]
49
+ backbone_atoms: Full protein backbone atoms [seq_len, 3, 3] (N, CA, C)
50
+ rsa_values: Full protein RSA values [seq_len]
51
+ epitope_indices: List of epitope residue indices
52
+ pdb_id: PDB ID
53
+ chain_id: Chain ID
54
+ num_rbf: Number of RBF features
55
+ num_posenc: Number of positional encoding features
56
+ radius: Distance threshold for edge creation (default: 18.0 Å)
57
+ verbose: Whether to print debug information
58
+
59
+ Returns:
60
+ PyTorch Geometric Data object or None if creation fails
61
+ """
62
+ try:
63
+ # Validate input dimensions
64
+ seq_len = len(embeddings)
65
+ if len(backbone_atoms) != seq_len or len(rsa_values) != seq_len:
66
+ if verbose:
67
+ print(f"[WARNING] Dimension mismatch for {pdb_id}_{chain_id}: "
68
+ f"embeddings={len(embeddings)}, backbone={len(backbone_atoms)}, "
69
+ f"rsa={len(rsa_values)}")
70
+ return None
71
+
72
+ if seq_len == 0:
73
+ if verbose:
74
+ print(f"[WARNING] Empty protein {pdb_id}_{chain_id}")
75
+ return None
76
+
77
+ # Create node labels (binary epitope classification)
78
+ node_labels = np.zeros(seq_len, dtype=np.float32)
79
+ if epitope_indices:
80
+ # Filter epitope_indices to ensure they are within bounds
81
+ valid_epitope_indices = [idx for idx in epitope_indices if 0 <= idx < seq_len]
82
+ if valid_epitope_indices:
83
+ node_labels[valid_epitope_indices] = 1.0
84
+
85
+ if verbose and len(valid_epitope_indices) != len(epitope_indices):
86
+ print(f"[WARNING] Some epitope indices out of bounds for {pdb_id}_{chain_id}: "
87
+ f"filtered {len(epitope_indices)} -> {len(valid_epitope_indices)}")
88
+
89
+ # Extract CA coordinates for distance calculation
90
+ ca_coords = backbone_atoms[:, 1, :] # CA is the second atom [seq_len, 3]
91
+
92
+ # Validate CA coordinates
93
+ if ca_coords.shape[0] == 0:
94
+ if verbose:
95
+ print(f"[WARNING] Empty CA coordinates for {pdb_id}_{chain_id}")
96
+ return None
97
+
98
+ # Check for NaN or infinite values
99
+ if np.any(np.isnan(ca_coords)) or np.any(np.isinf(ca_coords)):
100
+ if verbose:
101
+ print(f"[WARNING] Invalid CA coordinates (NaN/Inf) for {pdb_id}_{chain_id}")
102
+ return None
103
+
104
+ # Create edges based on distance threshold using radius_graph
105
+ ca_coords_tensor = torch.tensor(ca_coords, dtype=torch.float32)
106
+
107
+ # Additional safety check for tensor
108
+ if ca_coords_tensor.numel() == 0:
109
+ if verbose:
110
+ print(f"[WARNING] Empty CA coordinates tensor for {pdb_id}_{chain_id}")
111
+ return None
112
+
113
+ edge_index = radius_graph(ca_coords_tensor, r=radius, loop=False, max_num_neighbors=32)
114
+
115
+ if edge_index.shape[1] == 0:
116
+ if verbose:
117
+ print(f"[WARNING] No edges found for {pdb_id}_{chain_id} with radius {radius}")
118
+ # Create a minimal graph with self-loops to avoid empty graph
119
+ edge_index = torch.stack([torch.arange(seq_len), torch.arange(seq_len)], dim=0)
120
+
121
+ # Compute edge features
122
+ edge_features = compute_edge_features(ca_coords, edge_index, num_rbf=num_rbf, num_posenc=num_posenc)
123
+
124
+ # Convert to tensors
125
+ x = torch.tensor(embeddings, dtype=torch.float32) # [seq_len, embed_dim]
126
+ pos = torch.tensor(backbone_atoms, dtype=torch.float32) # [seq_len, 3, 3]
127
+ rsa = torch.tensor(rsa_values, dtype=torch.float32) # [seq_len]
128
+
129
+ # Node-level labels
130
+ y_node = torch.tensor(node_labels, dtype=torch.float32) # [seq_len]
131
+
132
+ # Additional protein-level statistics
133
+ num_epitopes = int(node_labels.sum())
134
+ epitope_ratio = num_epitopes / seq_len if seq_len > 0 else 0.0
135
+
136
+ # Create Data object
137
+ data = Data(
138
+ x=x, # Node embeddings [seq_len, embed_dim]
139
+ pos=pos, # Backbone coordinates [seq_len, 3, 3]
140
+ rsa=rsa, # RSA values [seq_len]
141
+ edge_index=edge_index, # Edge connectivity [2, n_edges]
142
+ edge_attr=edge_features, # Edge features [n_edges, edge_dim]
143
+ y_node=y_node, # Node-level labels [seq_len]
144
+ epitope_indices=epitope_indices, # Original epitope indices
145
+ pdb_id=pdb_id, # PDB ID
146
+ chain_id=chain_id, # Chain ID
147
+ num_nodes=seq_len, # Number of nodes (residues)
148
+ num_epitopes=num_epitopes, # Number of epitope residues
149
+ epitope_ratio=epitope_ratio, # Ratio of epitope residues
150
+ radius=radius # Distance threshold used for edges
151
+ )
152
+
153
+ if verbose:
154
+ print(f"[INFO] Created full protein graph for {pdb_id}_{chain_id}: "
155
+ f"{seq_len} nodes, {edge_index.shape[1]} edges, {num_epitopes} epitopes")
156
+
157
+ return data
158
+
159
+ except Exception as e:
160
+ if verbose:
161
+ print(f"[ERROR] Failed to create full protein graph for {pdb_id}_{chain_id}: {str(e)}")
162
+ return None
163
+
164
+ def create_graph_data(
165
+ center_idx: int,
166
+ covered_indices: List[int],
167
+ covered_epitope_indices: List[int],
168
+ embeddings: np.ndarray,
169
+ backbone_atoms: np.ndarray,
170
+ rsa_values: np.ndarray,
171
+ epitope_indices: List[int],
172
+ recall: float,
173
+ precision: float,
174
+ pdb_id: str,
175
+ chain_id: str,
176
+ embeddings2: np.ndarray = None,
177
+ num_rbf: int = 16,
178
+ num_posenc: int = 16,
179
+ verbose: bool = True
180
+ ) -> Optional[Data]:
181
+ """
182
+ Create a PyTorch Geometric Data object for a spherical region.
183
+
184
+ Args:
185
+ center_idx: Index of the center residue
186
+ covered_indices: List of residue indices in the region
187
+ covered_epitope_indices: List of epitope residue indices in the region
188
+ embeddings: Full protein embeddings
189
+ backbone_atoms: Full protein backbone atoms [seq_len, 3, 3]
190
+ rsa_values: Full protein RSA values
191
+ epitope_indices: List of all epitope indices in the protein (if available)
192
+ recall: Region recall value (if available)
193
+ precision: Region precision value (if available)
194
+ pdb_id: PDB ID
195
+ chain_id: Chain ID
196
+
197
+ Returns:
198
+ PyTorch Geometric Data object or None if creation fails
199
+ """
200
+ try:
201
+ # Validate indices first
202
+ if not covered_indices:
203
+ if verbose:
204
+ print(f"[WARNING] Empty covered_indices for center {center_idx}")
205
+ return None
206
+
207
+ # Check if indices are within bounds
208
+ max_idx = max(covered_indices)
209
+ if max_idx >= len(embeddings) or max_idx >= len(backbone_atoms) or max_idx >= len(rsa_values):
210
+ if verbose:
211
+ print(f"[WARNING] Index out of bounds: max_idx={max_idx}, "
212
+ f"embeddings_len={len(embeddings)}, backbone_len={len(backbone_atoms)}, "
213
+ f"rsa_len={len(rsa_values)}")
214
+ return None
215
+
216
+ # Extract node features for covered residues
217
+ node_embeddings = embeddings[covered_indices] # [n_nodes, embed_dim]
218
+ node_backbone = backbone_atoms[covered_indices] # [n_nodes, 3, 3]
219
+ node_rsa = rsa_values[covered_indices] # [n_nodes]
220
+
221
+ if embeddings2 is not None:
222
+ node_embeddings2 = embeddings2[covered_indices] # [n_nodes, embed_dim]
223
+ else:
224
+ node_embeddings2 = None
225
+
226
+ # Create node labels (binary epitope classification)
227
+ node_labels = np.zeros(len(covered_indices), dtype=np.float32)
228
+ # Use the epitope_indices from the loaded data if available
229
+ epitope_mask = np.isin(covered_indices, epitope_indices)
230
+ node_labels[epitope_mask] = 1.0
231
+
232
+ # Create fully connected edge index (no self-loops)
233
+ n_nodes = len(covered_indices)
234
+ edge_index = get_edges(n_nodes)
235
+ edge_index = torch.tensor(edge_index, dtype=torch.long)
236
+
237
+ # Compute edge features using CA coordinates
238
+ ca_coords = node_backbone[:, 1, :] # Extract CA coordinates [n_nodes, 3]
239
+ edge_features = compute_edge_features(ca_coords, edge_index, num_rbf=num_rbf, num_posenc=num_posenc)
240
+
241
+ # Convert to tensors
242
+ x = torch.tensor(node_embeddings, dtype=torch.float32)
243
+ pos = torch.tensor(node_backbone, dtype=torch.float32) # [n_nodes, 3, 3]
244
+ rsa = torch.tensor(node_rsa, dtype=torch.float32)
245
+
246
+ # Graph-level label (recall)
247
+ y_graph = torch.tensor([recall], dtype=torch.float32)
248
+
249
+ # Node-level labels
250
+ y_node = torch.tensor(node_labels, dtype=torch.float32)
251
+
252
+ # Create Data object
253
+ data = Data(
254
+ x=x, # Node embeddings [n_nodes, embed_dim]
255
+ pos=pos, # Backbone coordinates [n_nodes, 3, 3]
256
+ rsa=rsa, # RSA values [n_nodes]
257
+ edge_index=edge_index, # Edge connectivity [2, n_edges]
258
+ edge_attr=edge_features, # Edge features [n_edges, edge_dim]
259
+ y=y_graph, # Graph-level label (recall)
260
+ y_node=y_node, # Node-level labels [n_nodes]
261
+ center_idx=center_idx, # Center residue index
262
+ covered_indices=covered_indices, # All covered residue indices
263
+ precision=precision, # Region precision
264
+ pdb_id=pdb_id, # PDB ID
265
+ chain_id=chain_id, # Chain ID
266
+ num_nodes=n_nodes, # Number of nodes
267
+ embeddings2=node_embeddings2, # other embeddings [n_nodes, embed_dim] - region-specific
268
+ )
269
+
270
+ return data
271
+
272
+ except Exception as e:
273
+ if verbose:
274
+ print(f"Error creating graph data for {pdb_id}_{chain_id} center {center_idx}: {str(e)}")
275
+ return None
276
+
277
+ def compute_edge_features(coords: np.ndarray, edge_index: torch.Tensor, num_rbf: int = 16, num_posenc: int = 16) -> torch.Tensor:
278
+ """
279
+ Compute edge features including RBF and positional encoding.
280
+
281
+ Args:
282
+ coords: Node coordinates [n_nodes, 3]
283
+ edge_index: Edge connectivity [2, n_edges]
284
+ num_rbf: Number of RBF features
285
+ num_posenc: Number of positional encoding features
286
+
287
+ Returns:
288
+ Edge features [n_edges, edge_dim]
289
+ """
290
+ # Convert to torch tensors
291
+ coords_tensor = torch.tensor(coords, dtype=torch.float32)
292
+
293
+ # Compute edge vectors and distances
294
+ edge_vectors = coords_tensor[edge_index[0]] - coords_tensor[edge_index[1]] # [n_edges, 3]
295
+ edge_distances = torch.norm(edge_vectors, dim=-1) # [n_edges]
296
+
297
+ # RBF features
298
+ edge_rbf = rbf(edge_distances, D_count=num_rbf) # [n_edges, num_rbf]
299
+
300
+ # Positional encoding
301
+ edge_posenc = get_posenc(edge_index, num_posenc=num_posenc) # [n_edges, num_posenc]
302
+
303
+ # Concatenate edge features
304
+ edge_features = torch.cat([edge_rbf, edge_posenc], dim=-1) # [n_edges, num_rbf + num_posenc]
305
+
306
+ return edge_features
307
+
308
+
309
+ def get_edges(n_nodes):
310
+ """Generate fully connected edge indices (no self-loops)"""
311
+ rows, cols = [], []
312
+ for i in range(n_nodes):
313
+ for j in range(n_nodes):
314
+ if i != j:
315
+ rows.append(i)
316
+ cols.append(j)
317
+ return [rows, cols]
318
+
319
+
320
+ def get_posenc(edge_index, num_posenc=16):
321
+ """
322
+ Generate positional encoding for edges.
323
+ From https://github.com/jingraham/neurips19-graph-protein-design
324
+ """
325
+ d = edge_index[0] - edge_index[1]
326
+
327
+ frequency = torch.exp(
328
+ torch.arange(0, num_posenc, 2, dtype=torch.float32, device=d.device)
329
+ * -(np.log(10000.0) / num_posenc)
330
+ )
331
+
332
+ angles = d.unsqueeze(-1) * frequency
333
+ E = torch.cat((torch.cos(angles), torch.sin(angles)), -1)
334
+ return E
335
+
336
+
337
+ def rbf(D, D_min=0., D_max=20., D_count=16):
338
+ """
339
+ Radial Basis Function (RBF) encoding for distances.
340
+ From https://github.com/jingraham/neurips19-graph-protein-design
341
+
342
+ Returns an RBF embedding of `torch.Tensor` `D` along a new axis=-1.
343
+ That is, if `D` has shape [...dims], then the returned tensor will have
344
+ shape [...dims, D_count].
345
+ """
346
+ D_mu = torch.linspace(D_min, D_max, D_count, device=D.device)
347
+ D_mu = D_mu.view([1, -1])
348
+ D_sigma = (D_max - D_min) / D_count
349
+ D_expand = torch.unsqueeze(D, -1)
350
+
351
+ RBF = torch.exp(-((D_expand - D_mu) / D_sigma) ** 2)
352
+ return RBF
src/bce/model/EGNN.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torch_scatter import scatter
5
+ from torch_geometric.nn import InstanceNorm
6
+
7
+ class EGNNLayer(nn.Module):
8
+ """
9
+ EGNN layer with optional feed forward network and batch normalization.
10
+
11
+ Args:
12
+ input_nf: Number of input node features
13
+ output_nf: Number of output node features
14
+ hidden_nf: Number of hidden features
15
+ edges_in_d: Number of input edge features
16
+ act_fn: Activation function
17
+ residual: Whether to use residual connections
18
+ attention: Whether to use attention mechanism for edge features
19
+ normalize: Whether to normalize coordinates
20
+ coords_agg: Aggregation method for coordinates (mean, sum, max, min)
21
+ tanh: Whether to use tanh activation for coordinate updates
22
+ dropout: Dropout rate
23
+ ffn: Whether to use feed forward network
24
+ batch_norm: Whether to use batch normalization
25
+ """
26
+ def __init__(self, input_nf, output_nf, hidden_nf,
27
+ edges_in_d=0, act_fn=nn.SiLU(),
28
+ residual=True, attention=False, normalize=False,
29
+ coords_agg='mean', tanh=False, dropout=0.0,
30
+ ffn=False, batch_norm=True):
31
+ super().__init__()
32
+ self.input_nf = input_nf
33
+ self.output_nf = output_nf
34
+ self.hidden_nf = hidden_nf
35
+ self.residual = residual
36
+ self.attention = attention
37
+ self.normalize = normalize
38
+ self.coords_agg = coords_agg
39
+ self.tanh = tanh
40
+ self.epsilon = 1e-8
41
+ self.dropout = dropout
42
+ self.ffn = ffn
43
+ self.batch_norm = batch_norm
44
+
45
+ # Edge MLP
46
+ in_edge = input_nf*2 + 1 + edges_in_d
47
+ self.edge_mlp = nn.Sequential(
48
+ nn.Linear(in_edge, hidden_nf),
49
+ act_fn, nn.Dropout(dropout),
50
+ nn.Linear(hidden_nf, hidden_nf),
51
+ act_fn, nn.Dropout(dropout),
52
+ )
53
+ if attention:
54
+ self.att_mlp = nn.Sequential(nn.Linear(hidden_nf,1), nn.Sigmoid())
55
+
56
+ # Coord MLP
57
+ layer = nn.Linear(hidden_nf,1, bias=False)
58
+ nn.init.xavier_uniform_(layer.weight, gain=0.001)
59
+ coord_blocks = [nn.Linear(hidden_nf, hidden_nf), act_fn,
60
+ nn.Dropout(dropout), layer]
61
+ if tanh: coord_blocks.append(nn.Tanh())
62
+ self.coord_mlp = nn.Sequential(*coord_blocks)
63
+
64
+ # Node MLP
65
+ self.node_mlp = nn.Sequential(
66
+ nn.Linear(hidden_nf + input_nf, hidden_nf),
67
+ act_fn, nn.Dropout(dropout),
68
+ nn.Linear(hidden_nf, output_nf),
69
+ )
70
+
71
+ # per-graph normalization
72
+ if batch_norm:
73
+ self.norm_node = InstanceNorm(output_nf, affine=True)
74
+ self.norm_coord = InstanceNorm(3, affine=True)
75
+
76
+ # FFN
77
+ if ffn:
78
+ self.ff1 = nn.Linear(output_nf, output_nf*2)
79
+ self.ff2 = nn.Linear(output_nf*2, output_nf)
80
+ self.act_ff = act_fn
81
+ self.drop_ff = nn.Dropout(dropout)
82
+ if batch_norm:
83
+ self.norm_ff1 = InstanceNorm(output_nf, affine=True)
84
+ self.norm_ff2 = InstanceNorm(output_nf, affine=True)
85
+
86
+ def coord2radial(self, edge_index, coord):
87
+ row, col = edge_index
88
+ diff = coord[row] - coord[col]
89
+ dist2 = (diff**2).sum(dim=-1, keepdim=True)
90
+
91
+ # Clamp distance to prevent extreme values
92
+ dist2 = torch.clamp(dist2, min=self.epsilon, max=100.0)
93
+
94
+ if self.normalize:
95
+ norm = (dist2.sqrt().detach() + self.epsilon)
96
+ diff = diff / norm
97
+ # Check for NaN/Inf in normalized diff
98
+ diff = torch.where(torch.isfinite(diff), diff, torch.zeros_like(diff))
99
+ return dist2, diff
100
+
101
+ def _ff_block(self, x):
102
+ """Feed Forward block.
103
+ """
104
+ x = self.drop_ff(self.act_ff(self.ff1(x)))
105
+ return self.ff2(x)
106
+
107
+ def forward(self, h, coord, edge_index, batch, edge_attr=None, node_attr=None):
108
+ row, col = edge_index
109
+ radial, coord_diff = self.coord2radial(edge_index, coord)
110
+
111
+ # -- edge features --
112
+ e_in = [h[row], h[col], radial]
113
+ if edge_attr is not None: e_in.append(edge_attr)
114
+ e = torch.cat(e_in, dim=-1)
115
+ e = self.edge_mlp(e)
116
+ if self.attention:
117
+ att = self.att_mlp(e)
118
+ e = e * att
119
+
120
+ # -- coordinate update --
121
+ coord_update = self.coord_mlp(e) # [E,1]
122
+ # Clamp coordinate updates to prevent explosion
123
+ coord_update = torch.clamp(coord_update, -1.0, 1.0)
124
+ trans = coord_diff * coord_update # [E,3]
125
+
126
+ # Check for NaN/Inf in coordinate updates
127
+ trans = torch.where(torch.isfinite(trans), trans, torch.zeros_like(trans))
128
+
129
+ agg_coord = scatter(trans, row, dim=0,
130
+ dim_size=coord.size(0),
131
+ reduce=self.coords_agg)
132
+ coord = coord + agg_coord
133
+
134
+ # Check for NaN/Inf in final coordinates
135
+ coord = torch.where(torch.isfinite(coord), coord, torch.zeros_like(coord))
136
+
137
+ if self.batch_norm:
138
+ coord = self.norm_coord(coord, batch)
139
+
140
+ # -- node update --
141
+ agg_node = scatter(e, row, dim=0,
142
+ dim_size=h.size(0), reduce='sum')
143
+ x_in = torch.cat([h, agg_node], dim=-1)
144
+ if node_attr is not None:
145
+ x_in = torch.cat([x_in, node_attr], dim=-1)
146
+ h_new = self.node_mlp(x_in)
147
+ if self.batch_norm:
148
+ h_new = self.norm_node(h_new, batch)
149
+ if self.residual and h_new.shape[-1] == h.shape[-1]:
150
+ h_new = h + h_new
151
+
152
+ # -- optional FFN --
153
+ if self.ffn:
154
+ if self.batch_norm:
155
+ h_new = self.norm_ff1(h_new, batch)
156
+ h_new = h_new + self._ff_block(h_new)
157
+ if self.batch_norm:
158
+ h_new = self.norm_ff2(h_new, batch)
159
+
160
+ return h_new, coord, e
161
+
162
+ class EGNNLayer2(nn.Module):
163
+ """
164
+ EGNN layer with optional feed forward network and batch normalization.
165
+
166
+ Args:
167
+ input_nf: Number of input node features
168
+ output_nf: Number of output node features
169
+ hidden_nf: Number of hidden features
170
+ edges_in_d: Number of input edge features
171
+ act_fn: Activation function
172
+ residual: Whether to use residual connections
173
+ attention: Whether to use attention mechanism for edge features
174
+ normalize: Whether to normalize coordinates
175
+ coords_agg: Aggregation method for coordinates (mean, sum, max, min)
176
+ tanh: Whether to use tanh activation for coordinate updates
177
+ dropout: Dropout rate
178
+ ffn: Whether to use feed forward network
179
+ batch_norm: Whether to use batch normalization
180
+ """
181
+ def __init__(self, input_nf, output_nf, hidden_nf,
182
+ edges_in_d=0, act_fn=nn.SiLU(),
183
+ residual=True, attention=False, normalize=False,
184
+ coords_agg='mean', tanh=False, dropout=0.0,
185
+ ffn=False, batch_norm=True):
186
+ super().__init__()
187
+ self.input_nf = input_nf
188
+ self.output_nf = output_nf
189
+ self.hidden_nf = hidden_nf
190
+ self.residual = residual
191
+ self.attention = attention
192
+ self.normalize = normalize
193
+ self.coords_agg = coords_agg
194
+ self.tanh = tanh
195
+ self.epsilon = 1e-8
196
+ self.dropout = dropout
197
+ self.ffn = ffn
198
+ self.batch_norm = batch_norm
199
+
200
+ # Edge MLP
201
+ in_edge = input_nf*2 + 1 + edges_in_d
202
+ self.edge_mlp = nn.Sequential(
203
+ nn.Linear(in_edge, hidden_nf),
204
+ act_fn, nn.Dropout(dropout),
205
+ nn.Linear(hidden_nf, hidden_nf),
206
+ act_fn, nn.Dropout(dropout),
207
+ )
208
+ if attention:
209
+ self.att_mlp = nn.Sequential(nn.Linear(hidden_nf,1), nn.Sigmoid())
210
+
211
+ # Coord MLP
212
+ layer = nn.Linear(hidden_nf,1, bias=False)
213
+ nn.init.xavier_uniform_(layer.weight, gain=0.001)
214
+ coord_blocks = [nn.Linear(hidden_nf, hidden_nf), act_fn,
215
+ nn.Dropout(dropout), layer]
216
+ if tanh: coord_blocks.append(nn.Tanh())
217
+ self.coord_mlp = nn.Sequential(*coord_blocks)
218
+
219
+ # Node MLP
220
+ self.node_mlp = nn.Sequential(
221
+ nn.Linear(hidden_nf + input_nf, hidden_nf),
222
+ act_fn, nn.Dropout(dropout),
223
+ nn.Linear(hidden_nf, output_nf),
224
+ )
225
+
226
+ # per-graph normalization
227
+ if batch_norm:
228
+ self.norm_node = InstanceNorm(output_nf, affine=True)
229
+ self.norm_coord = InstanceNorm(3, affine=True)
230
+
231
+ # FFN
232
+ if ffn:
233
+ self.ff1 = nn.Linear(output_nf, output_nf*2)
234
+ self.ff2 = nn.Linear(output_nf*2, output_nf)
235
+ self.act_ff = act_fn
236
+ self.drop_ff = nn.Dropout(dropout)
237
+ if batch_norm:
238
+ self.norm_ff1 = InstanceNorm(output_nf, affine=True)
239
+ self.norm_ff2 = InstanceNorm(output_nf, affine=True)
240
+
241
+ def coord2radial(self, edge_index, coord):
242
+ row, col = edge_index
243
+ diff = coord[row] - coord[col]
244
+ dist2 = (diff**2).sum(dim=-1, keepdim=True)
245
+
246
+ # Clamp distance to prevent extreme values
247
+ dist2 = torch.clamp(dist2, min=self.epsilon, max=100.0)
248
+
249
+ if self.normalize:
250
+ norm = (dist2.sqrt().detach() + self.epsilon)
251
+ diff = diff / norm
252
+ # Check for NaN/Inf in normalized diff
253
+ diff = torch.where(torch.isfinite(diff), diff, torch.zeros_like(diff))
254
+ return dist2, diff
255
+
256
+ def _ff_block(self, x):
257
+ """Feed Forward block.
258
+ """
259
+ x = self.drop_ff(self.act_ff(self.ff1(x)))
260
+ return self.ff2(x)
261
+
262
+ def forward(self, h, coord, edge_index, batch, edge_attr=None, node_attr=None):
263
+ row, col = edge_index
264
+ radial, coord_diff = self.coord2radial(edge_index, coord)
265
+
266
+ # -- edge features --
267
+ e_in = [h[row], h[col], radial]
268
+ if edge_attr is not None: e_in.append(edge_attr)
269
+ e = torch.cat(e_in, dim=-1)
270
+ e = self.edge_mlp(e)
271
+ if self.attention:
272
+ att = self.att_mlp(e)
273
+ e = e * att
274
+
275
+ # -- coordinate update --
276
+ coord_update = self.coord_mlp(e) # [E,1]
277
+ # Clamp coordinate updates to prevent explosion
278
+ coord_update = torch.clamp(coord_update, -1.0, 1.0)
279
+ trans = coord_diff * coord_update # [E,3]
280
+
281
+ # Check for NaN/Inf in coordinate updates
282
+ trans = torch.where(torch.isfinite(trans), trans, torch.zeros_like(trans))
283
+
284
+ agg_coord = scatter(trans, row, dim=0,
285
+ dim_size=coord.size(0),
286
+ reduce=self.coords_agg)
287
+ coord = coord + agg_coord
288
+
289
+ # Check for NaN/Inf in final coordinates
290
+ coord = torch.where(torch.isfinite(coord), coord, torch.zeros_like(coord))
291
+
292
+ if self.batch_norm:
293
+ coord = self.norm_coord(coord, batch)
294
+
295
+ # -- node update --
296
+ agg_node = scatter(e, row, dim=0,
297
+ dim_size=h.size(0), reduce='sum')
298
+ x_in = torch.cat([h, agg_node], dim=-1)
299
+ if node_attr is not None:
300
+ x_in = torch.cat([x_in, node_attr], dim=-1)
301
+ h_new = self.node_mlp(x_in)
302
+ if self.batch_norm:
303
+ h_new = self.norm_node(h_new, batch)
304
+ if self.residual and h_new.shape[-1] == h.shape[-1]:
305
+ h_new = h + h_new
306
+
307
+ # -- optional FFN --
308
+ if self.ffn:
309
+ if self.batch_norm:
310
+ h_new = self.norm_ff1(h_new, batch)
311
+ h_new = h_new + self._ff_block(h_new)
312
+ if self.batch_norm:
313
+ h_new = self.norm_ff2(h_new, batch)
314
+
315
+ return h_new, coord, e
src/bce/model/ReCEP.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from prettytable import PrettyTable
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch_scatter import scatter_softmax, scatter_sum
7
+ from torch_geometric.data import Data, Batch
8
+
9
+ from .dihedral import DihedralFeatures
10
+ from .EGNN import EGNNLayer
11
+ from .pooling import AttentionPooling, AddPooling
12
+ from .activation import get_activation
13
+ # from .baseline import EP
14
+
15
+ class ReCEP(nn.Module):
16
+ """
17
+ Refined Graph Epitope Predictor with optional EGNN layer skipping for ablation.
18
+ """
19
+ def __init__(
20
+ self,
21
+ in_dim: int = 2560,
22
+ rsa: bool = True,
23
+ dihedral: bool = True,
24
+ node_dims: list = [512, 256, 256],
25
+ edge_dim: int = 32,
26
+ dropout: float = 0.3,
27
+ activation: str = "gelu",
28
+ residual: bool = True,
29
+ attention: bool = True,
30
+ normalize: bool = True,
31
+ coords_agg: str = 'mean',
32
+ ffn: bool = True,
33
+ batch_norm: bool = True,
34
+ concat: bool = False,
35
+ addition: bool = False,
36
+ # Global predictor
37
+ pooling: str = 'attention',
38
+ # Node classifier
39
+ fusion_type: str = 'concat',
40
+ node_gate: bool = False,
41
+ node_norm: bool = False,
42
+ node_layers: int = 2,
43
+ out_dropout: float = 0.2,
44
+ use_egnn: bool = True, # NEW: toggle for EGNN layer usage
45
+ encoder: str = 'esmc',
46
+ ):
47
+ super().__init__()
48
+ self.use_egnn = use_egnn
49
+ self.in_dim = in_dim
50
+ self.rsa = rsa
51
+ self.dihedral = dihedral
52
+ self.original_node_dims = node_dims.copy()
53
+ self.edge_dim = edge_dim
54
+ self.dropout = dropout
55
+ self.activation = activation
56
+ self.residual = residual
57
+ self.attention = attention
58
+ self.normalize = normalize
59
+ self.ffn = ffn
60
+ self.batch_norm = batch_norm
61
+ self.coords_agg = coords_agg
62
+ self.concat = concat
63
+ self.addition = addition
64
+ self.fusion_type = fusion_type
65
+ self.node_gate = node_gate
66
+ self.node_norm = node_norm
67
+ self.node_layers = node_layers
68
+ self.out_dropout = out_dropout
69
+ self.pooling = pooling
70
+
71
+ self.base_node_dim = node_dims[0]
72
+ self.node_dims = node_dims.copy()
73
+ self.node_dims[0] += 1 if rsa else 0
74
+
75
+ self.node_dims[-1] = self.node_dims[0] if addition else self.node_dims[-1]
76
+
77
+ # Modify input dimension based on encoder
78
+ self.encoder = encoder
79
+ if encoder == 'esmc':
80
+ self.in_dim = 2560
81
+ elif encoder == 'esm2':
82
+ self.in_dim = 1280
83
+ else:
84
+ self.in_dim = in_dim
85
+
86
+ # Calculate actual final node dimension based on whether EGNN is used
87
+ if self.use_egnn:
88
+ self.final_node_dim = self.node_dims[-1]
89
+ else:
90
+ self.final_node_dim = self.node_dims[0]
91
+ self.concat = False
92
+ self.addition = False
93
+
94
+ self.proj_layer = nn.Sequential(
95
+ nn.Linear(self.in_dim, self.base_node_dim),
96
+ get_activation(activation),
97
+ nn.Dropout(dropout),
98
+ )
99
+
100
+ if dihedral:
101
+ try:
102
+ self.dihedral_features = DihedralFeatures(self.base_node_dim)
103
+ except:
104
+ print("Warning: DihedralFeatures not found, skipping dihedral features")
105
+ self.dihedral = False
106
+
107
+ self.egnn_layers = nn.ModuleList()
108
+ if self.use_egnn:
109
+ for i in range(len(self.node_dims) - 1):
110
+ self.egnn_layers.append(
111
+ EGNNLayer(
112
+ input_nf=self.node_dims[i],
113
+ output_nf=self.node_dims[i+1],
114
+ hidden_nf=self.node_dims[i+1],
115
+ edges_in_d=edge_dim,
116
+ act_fn=get_activation(activation),
117
+ residual=residual,
118
+ attention=attention,
119
+ normalize=normalize,
120
+ coords_agg=coords_agg,
121
+ dropout=dropout,
122
+ ffn=ffn,
123
+ batch_norm=batch_norm
124
+ )
125
+ )
126
+
127
+ if concat and self.use_egnn:
128
+ self.final_node_dim += self.node_dims[0]
129
+
130
+ if addition and self.use_egnn:
131
+ assert self.node_dims[0] == self.node_dims[-1], "Node dimension mismatch for addition"
132
+ self.final_node_dim = self.node_dims[0]
133
+
134
+ # Calculate node classifier input dimension based on fusion type
135
+ if fusion_type == 'concat':
136
+ self.node_classifier_input_dim = self.final_node_dim * 2
137
+ elif fusion_type == 'add':
138
+ self.node_classifier_input_dim = self.final_node_dim
139
+ else:
140
+ raise ValueError(f"Unsupported fusion type: {fusion_type}")
141
+
142
+ # Calculate node gate input dimension
143
+ if node_gate:
144
+ if fusion_type == 'concat':
145
+ self.node_gate_input_dim = self.final_node_dim * 2
146
+ elif fusion_type == 'add':
147
+ self.node_gate_input_dim = self.final_node_dim
148
+ else:
149
+ raise ValueError(f"Unsupported fusion type: {fusion_type}")
150
+
151
+ if pooling == 'attention':
152
+ self.graph_pool = AttentionPooling(
153
+ input_dim=self.final_node_dim,
154
+ dropout=dropout,
155
+ activation=activation
156
+ )
157
+ elif pooling == 'add':
158
+ self.graph_pool = AddPooling(
159
+ input_dim=self.final_node_dim,
160
+ dropout=dropout
161
+ )
162
+ else:
163
+ raise ValueError(f"Unsupported pooling method: {pooling}")
164
+
165
+ self.global_predictor = nn.Sequential(
166
+ nn.Linear(self.final_node_dim, self.final_node_dim // 2),
167
+ get_activation(activation),
168
+ nn.Dropout(out_dropout),
169
+ nn.Linear(self.final_node_dim // 2, 1)
170
+ )
171
+
172
+ if node_gate:
173
+ self.node_gate = nn.Sequential(
174
+ nn.Linear(self.node_gate_input_dim, self.final_node_dim),
175
+ get_activation(activation),
176
+ nn.LayerNorm(self.final_node_dim),
177
+ nn.Linear(self.final_node_dim, self.final_node_dim),
178
+ nn.Sigmoid()
179
+ )
180
+
181
+ self.node_classifier = self._build_node_classifier()
182
+
183
+ self._param_printed = False
184
+ self.apply(self._init_weights)
185
+
186
+ def _build_node_classifier(self):
187
+ layers = []
188
+ input_dim = self.node_classifier_input_dim
189
+ current_dim = input_dim
190
+ for i in range(self.node_layers):
191
+ output_dim = 1 if i == self.node_layers - 1 else max(current_dim // 2, 32)
192
+ layers.append(nn.Linear(current_dim, output_dim))
193
+ if self.node_norm and i < self.node_layers - 1:
194
+ layers.append(nn.LayerNorm(output_dim))
195
+ if i < self.node_layers - 1:
196
+ layers.append(get_activation(self.activation))
197
+ layers.append(nn.Dropout(self.out_dropout))
198
+ current_dim = output_dim
199
+ return nn.Sequential(*layers)
200
+
201
+ def _init_weights(self, module):
202
+ if isinstance(module, nn.Linear):
203
+ nn.init.xavier_normal_(module.weight)
204
+ if module.bias is not None:
205
+ nn.init.constant_(module.bias, 0.0 if module.out_features == 1 else 0.01)
206
+ elif isinstance(module, nn.LayerNorm):
207
+ nn.init.ones_(module.weight)
208
+ nn.init.zeros_(module.bias)
209
+
210
+ def forward(self, data: Batch) -> dict:
211
+ if self.training and not self._param_printed:
212
+ print(f"ReCEP total params: {sum(p.numel() for p in self.parameters()):,}")
213
+ self._param_printed = True
214
+
215
+ x = data.x
216
+
217
+ coords = data.pos
218
+ batch = data.batch
219
+ e_attr = data.edge_attr
220
+ coords_C = coords[:, 1].clone()
221
+
222
+ x = self.proj_layer(x)
223
+ if self.dihedral and coords is not None:
224
+ x = x + self.dihedral_features(coords)
225
+ if self.rsa and data.rsa is not None:
226
+ rsa = data.rsa.unsqueeze(-1)
227
+ x = torch.cat([x, rsa], dim=-1)
228
+
229
+ h = x
230
+ assert h.shape[1] == self.node_dims[0], f"[ReCEP] Node feature dim mismatch: got {h.shape[1]}, expected {self.node_dims[0]}"
231
+
232
+ if self.use_egnn:
233
+ for layer in self.egnn_layers:
234
+ h, coords_C, _ = layer(h, coords_C, data.edge_index, batch, edge_attr=e_attr)
235
+
236
+ if self.concat and self.use_egnn:
237
+ h = torch.cat([x, h], dim=-1)
238
+ elif self.addition and self.use_egnn:
239
+ h = h + x
240
+
241
+ graph_feats = self.graph_pool(h, batch)
242
+ global_pred = self.global_predictor(graph_feats).squeeze(-1)
243
+
244
+ context = graph_feats[batch]
245
+ if self.node_gate and hasattr(self, 'node_gate'):
246
+ if self.fusion_type == 'concat':
247
+ gate_input = torch.cat([h, context], dim=-1)
248
+ elif self.fusion_type == 'add':
249
+ gate_input = h + context
250
+ else:
251
+ raise ValueError(f"Unsupported fusion type: {self.fusion_type}")
252
+ gate = self.node_gate(gate_input)
253
+ gated_h = h + gate * h
254
+ else:
255
+ gated_h = h
256
+
257
+ if self.fusion_type == 'concat':
258
+ cat = torch.cat([gated_h, context], dim=-1)
259
+ elif self.fusion_type == 'add':
260
+ # Ensure dimensions match for addition
261
+ assert gated_h.shape[-1] == context.shape[-1], f"[ReCEP] Dimension mismatch for add fusion: gated_h {gated_h.shape[-1]} vs context {context.shape[-1]}"
262
+ cat = gated_h + context
263
+ else:
264
+ raise ValueError(f"Unsupported fusion type: {self.fusion_type}")
265
+
266
+ # Verify input dimension matches node classifier expectation
267
+ expected_dim = self.node_classifier_input_dim
268
+ actual_dim = cat.shape[-1]
269
+ assert actual_dim == expected_dim, f"[ReCEP] Node classifier input dim mismatch: got {actual_dim}, expected {expected_dim}"
270
+
271
+ node_preds = self.node_classifier(cat).squeeze(-1)
272
+
273
+ return {"global_pred": global_pred, "node_preds": node_preds}
274
+
275
+ def print_param_count(self):
276
+ """Print a summary table of parameter counts"""
277
+ table = PrettyTable()
278
+ table.field_names = ["Layer Name", "Type", "Parameters", "Trainable"]
279
+ total_params = 0
280
+ trainable_params = 0
281
+
282
+ for name, module in self.named_modules():
283
+ if not list(module.children()): # Only leaf nodes
284
+ params = sum(p.numel() for p in module.parameters())
285
+ is_trainable = any(p.requires_grad for p in module.parameters())
286
+
287
+ if params > 0:
288
+ total_params += params
289
+ trainable_params += params if is_trainable else 0
290
+
291
+ table.add_row([
292
+ name,
293
+ module.__class__.__name__,
294
+ f"{params:,}",
295
+ "✓" if is_trainable else "✗"
296
+ ])
297
+
298
+ table.add_row(["", "", "", ""], divider=True)
299
+ table.add_row([
300
+ "TOTAL",
301
+ "",
302
+ f"{total_params:,}",
303
+ f"Trainable: {trainable_params:,}"
304
+ ])
305
+
306
+ print("\nReCEP Model Parameter Summary:")
307
+ print(table)
308
+ print(f"Parameter Density: {trainable_params/total_params:.1%}\n")
309
+
310
+ def save(self, path, threshold: float = 0.5):
311
+ """Save model with configuration"""
312
+ path = Path(path)
313
+ try:
314
+ path.parent.mkdir(parents=True, exist_ok=True)
315
+ save_path = path.with_suffix('.bin')
316
+
317
+ config = self.get_config()
318
+ # config = {
319
+ # 'in_dim': self.in_dim,
320
+ # 'rsa': self.rsa,
321
+ # 'dihedral': self.dihedral,
322
+ # 'node_dims': self.original_node_dims, # Use original node_dims
323
+ # 'edge_dim': self.edge_dim,
324
+ # 'dropout': self.dropout,
325
+ # 'activation': self.activation,
326
+ # 'residual': self.residual,
327
+ # 'attention': self.attention,
328
+ # 'normalize': self.normalize,
329
+ # 'coords_agg': self.coords_agg,
330
+ # 'ffn': self.ffn,
331
+ # 'batch_norm': self.batch_norm,
332
+ # 'concat': self.concat,
333
+ # 'node_norm': self.node_norm,
334
+ # 'node_layers': self.node_layers,
335
+ # 'node_gate': self.node_gate,
336
+ # 'out_dropout': self.out_dropout
337
+ # }
338
+
339
+ torch.save({
340
+ 'model_state': self.state_dict(),
341
+ 'config': config,
342
+ 'model_class': self.__class__.__name__,
343
+ 'version': '1.0',
344
+ 'threshold': threshold
345
+ }, save_path)
346
+ print(f"ReCEP model saved to {save_path}")
347
+ except Exception as e:
348
+ print(f"Save failed: {str(e)}")
349
+ raise
350
+
351
+ @classmethod
352
+ def load(cls, path, device='cpu', strict=True, verbose=True):
353
+ """Load model with configuration"""
354
+ path = Path(path)
355
+ if not path.exists():
356
+ raise FileNotFoundError(f"Model file {path} not found")
357
+
358
+ try:
359
+ if isinstance(device, str):
360
+ device = torch.device(device)
361
+ elif isinstance(device, int):
362
+ if device >= 0 and torch.cuda.is_available():
363
+ device = torch.device(f'cuda:{device}')
364
+ else:
365
+ device = torch.device('cpu')
366
+ elif not isinstance(device, torch.device):
367
+ raise ValueError(f"Unsupported device type: {type(device)}")
368
+
369
+ checkpoint = torch.load(
370
+ path,
371
+ map_location=device,
372
+ weights_only=False
373
+ )
374
+ except RuntimeError:
375
+ print("Warning: Using unsafe load due to weights_only restriction")
376
+ checkpoint = torch.load(path, map_location=device)
377
+
378
+ # Version compatibility check
379
+ if 'version' not in checkpoint:
380
+ print("Warning: Loading legacy model without version info")
381
+
382
+ # Rebuild configuration
383
+ config = checkpoint.get('config', {})
384
+ model = cls(**config)
385
+
386
+ # Load state dict
387
+ model_state = checkpoint['model_state']
388
+ current_state = model.state_dict()
389
+
390
+ # Auto-match parameters
391
+ matched_state = {}
392
+ for name, param in model_state.items():
393
+ if name in current_state:
394
+ if param.shape == current_state[name].shape:
395
+ matched_state[name] = param
396
+ else:
397
+ print(f"Size mismatch: {name} (load {param.shape} vs model {current_state[name].shape})")
398
+ else:
399
+ print(f"Parameter not found: {name}")
400
+
401
+ current_state.update(matched_state)
402
+ model.load_state_dict(current_state, strict=strict)
403
+
404
+ if verbose:
405
+ print(f"Successfully loaded {len(matched_state)}/{len(model_state)} parameters")
406
+
407
+ return model.to(device), checkpoint.get('threshold', 0.5)
408
+
409
+ def get_config(self):
410
+ """Get model configuration"""
411
+ return {
412
+ 'in_dim': self.in_dim,
413
+ 'rsa': self.rsa,
414
+ 'dihedral': self.dihedral,
415
+ 'node_dims': self.original_node_dims,
416
+ 'edge_dim': self.edge_dim,
417
+ 'dropout': self.dropout,
418
+ 'activation': self.activation,
419
+ 'residual': self.residual,
420
+ 'attention': self.attention,
421
+ 'normalize': self.normalize,
422
+ 'coords_agg': self.coords_agg,
423
+ 'ffn': self.ffn,
424
+ 'batch_norm': self.batch_norm,
425
+ 'concat': self.concat,
426
+ 'addition': self.addition,
427
+ 'pooling': self.pooling,
428
+ 'fusion_type': self.fusion_type,
429
+ 'node_gate': self.node_gate,
430
+ 'node_norm': self.node_norm,
431
+ 'node_layers': self.node_layers,
432
+ 'out_dropout': self.out_dropout,
433
+ 'use_egnn': self.use_egnn,
434
+ 'encoder': self.encoder
435
+ }
436
+
437
+
438
+ model_registry = {
439
+ "ReCEP": ReCEP,
440
+ }
441
+
442
+ def get_model(configs):
443
+ """
444
+ Flexible model loader. Accepts either an argparse.Namespace or a dict.
445
+ Returns an instance of the selected model.
446
+ """
447
+ # Support both argparse.Namespace and dict
448
+ if hasattr(configs, '__dict__'):
449
+ args = vars(configs)
450
+ else:
451
+ args = configs
452
+
453
+ # Default to ReCEP if no model specified
454
+ model_name = args.get('model', 'ReCEP')
455
+
456
+ if model_name not in model_registry:
457
+ valid_models = list(model_registry.keys())
458
+ raise ValueError(f"Invalid model type: {model_name}. Must be one of: {valid_models}")
459
+
460
+ model_class = model_registry[model_name]
461
+
462
+ # Use inspect to get the model's __init__ parameters
463
+ import inspect
464
+ init_signature = inspect.signature(model_class.__init__)
465
+ parameters = init_signature.parameters
466
+
467
+ # Build model configuration from args
468
+ model_config = {}
469
+ for param_name, param in parameters.items():
470
+ if param_name == 'self':
471
+ continue
472
+ if param_name in args:
473
+ model_config[param_name] = args[param_name]
474
+ elif param.default is not param.empty:
475
+ model_config[param_name] = param.default
476
+ else:
477
+ print(f"[WARNING] Required parameter '{param_name}' not found in args and has no default value")
478
+
479
+ # print(f"[INFO] Creating {model_name} model with config: {list(model_config.keys())}")
480
+ model = model_class(**model_config)
481
+ return model
src/bce/model/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BCE Model Module
3
+
4
+ This module contains various neural network models for B-cell epitope prediction.
5
+ """
6
+
7
+ # from .ReCEP import ReCEP
8
+ # from .EGNN import EGNNLayer
9
+ # from .dihedral import DihedralFeatures
10
+
11
+ # __all__ = ['ReCEP', 'EGNNLayer', 'DihedralFeatures']
src/bce/model/__pycache__/EGNN.cpython-310.pyc ADDED
Binary file (4.7 kB). View file
 
src/bce/model/__pycache__/EGNN.cpython-39.pyc ADDED
Binary file (6.97 kB). View file
 
src/bce/model/__pycache__/ReCEP.cpython-310.pyc ADDED
Binary file (10.8 kB). View file