fatmacankara commited on
Commit
b441d1f
·
1 Parent(s): 89cdd80

Update code/pdb_featureVector.py

Browse files
Files changed (1) hide show
  1. code/pdb_featureVector.py +51 -79
code/pdb_featureVector.py CHANGED
@@ -203,14 +203,6 @@ def pdb(input_set, mode, impute):
203
  print('Processing PDB structures...\n')
204
  if pdbs == []:
205
  print('No PDB structure found for the query. ')
206
- """
207
- try:
208
- pdbs = [j.strip('[').strip(']').strip().strip('\'').strip('\"') for j in
209
- ((',').join([str(item) for item in pdbs])).split(',')]
210
- except IndexError:
211
- pdbs = []
212
- print('No PDB structure found for the query. ')
213
- """
214
  print('Starting PDB structures download...\n')
215
  pdbs = list(filter(None, pdbs))
216
  pdbs = (set(pdbs))
@@ -223,82 +215,61 @@ def pdb(input_set, mode, impute):
223
  shutil.rmtree('obsolete')
224
  except OSError as e:
225
  pass
226
- #existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
227
- #st.write('existing_pdb')
228
- #st.write(existing_pdb)
229
- #existing_pdb = [str(i) for i in existing_pdb]
230
- #existing_pdb = [i.split('/')[-1].split('.')[0].lower() for i in existing_pdb]
231
  cnt = 0
232
  st.write('this is the pdbs', pdbs)
 
 
 
 
 
 
 
 
 
233
  for search in pdbs:
234
- st.write('searching for pdb:', search)
 
235
  try:
236
- file = pdbl.retrieve_pdb_file(search, file_format="pdb")
237
- """
238
- path_pdb = 'out_files/pdb/pdb_structures'
239
- st.write('path for pdb: ', path_pdb)
240
- file = pdbl.retrieve_pdb_file(search, pdir=path_pdb, file_format="pdb")
241
-
242
- st.write('file: ', file)
243
-
244
- existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
245
- st.write('after download:', existing_pdb)
246
 
247
- existing_pdb = list(glob.glob(f"{path_pdb}/*"))
248
- st.write('after download 2:', existing_pdb)
249
-
250
- st.write('NEW METHOD')
251
-
252
- # Define the URL to retrieve the PDB file
253
- url = f"https://files.rcsb.org/download/{search}.pdb"
254
- st.write('url', url)
255
- # Send an HTTP GET request to the PDB website to download the PDB file
256
- response = requests.get(url)
257
- st.write('response', response)
258
- # Check if the request was successful
259
- if response.status_code == 200:
260
- st.write('here1')
261
- # Save the PDB file to a local file
262
- #st.write(f'out_files/pdb/pdb_structures/{search}.pdb')
263
- #try:
264
- # with open(f'out_files/pdb/pdb_structures/{search}.pdb', "wb") as f:
265
- # st.write('WRITING TO FILE')
266
- #except:
267
- # st.write('ERROR')
268
-
269
-
270
-
271
- from huggingface_hub import Hf
272
- api = HfApi()
273
- st.write('api', API)
274
-
275
- api.upload_file(
276
- path_or_fileobj=response.content,
277
- path_in_repo="out_files/pdb/pdb_structures/",
278
- repo_id="HUBioDataLab/ASCARIS",
279
- repo_type="space")
280
- st.write(f"PDB file {search}.pdb downloaded successfully.")
281
-
282
-
283
- # Aug 23
284
- content = response.content.decode("utf-8") # Decode the content if it's not already a string
285
- st.write('Content')
286
- st.write(content)
287
- parsed_records = list(SeqIO.parse(content, "fasta"))
288
- st.write('parsed_records')
289
- st.write(parsed_records)
290
- for rec in parsed_records:
291
- st.write(rec)
292
- st.write(rec.id)
293
- st.write(rec.pdbSequence)
294
- else:
295
- st.write('Here2')
296
- st.write(f"Failed to retrieve PDB file for {search}.")
297
-
298
-
299
- st.write('what')
300
- existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
301
- st.write('existing_pdb3', existing_pdb)
302
 
303
  """
304
 
@@ -324,6 +295,7 @@ def pdb(input_set, mode, impute):
324
  pdb_info.at[index, 'chain'] = 'nan'
325
  pdb_info.at[index, 'resolution'] = 'nan'
326
  cnt += 1
 
327
  print()
328
  st.write()
329
  st.write(pdb_info)
 
203
  print('Processing PDB structures...\n')
204
  if pdbs == []:
205
  print('No PDB structure found for the query. ')
 
 
 
 
 
 
 
 
206
  print('Starting PDB structures download...\n')
207
  pdbs = list(filter(None, pdbs))
208
  pdbs = (set(pdbs))
 
215
  shutil.rmtree('obsolete')
216
  except OSError as e:
217
  pass
218
+
 
 
 
 
219
  cnt = 0
220
  st.write('this is the pdbs', pdbs)
221
+ def fetch_uniprot_ids(pdb_code):
222
+ try:
223
+ response = requests.get(f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}")
224
+ response.raise_for_status() # Check for a successful response
225
+ data = response.json()
226
+ return list(list(list(data.values())[0].values())[0].keys())
227
+ except requests.exceptions.RequestException as e:
228
+ print(f"Failed to retrieve UniProt data for PDB code {pdb_code}: {e}")
229
+ return []
230
  for search in pdbs:
231
+ # Step 1: Fetch the PDB file
232
+ pdb_url = f"https://files.rcsb.org/download/{search}.pdb"
233
  try:
234
+ response = requests.get(pdb_url)
235
+ response.raise_for_status() # Check for a successful response
236
+ except requests.exceptions.RequestException as e:
237
+ print(f"Failed to retrieve data for PDB code {search}: {e}")
238
+ continue # Skip to the next PDB code if fetching fails
 
 
 
 
 
239
 
240
+ # Step 2: Parse the PDB file from memory
241
+ pdb_data = response.text
242
+ pdb_parser = PDBParser(QUIET=True) # QUIET=True suppresses warnings
243
+ pdb_file_content = StringIO(pdb_data)
244
+ structure = pdb_parser.get_structure(pdb_code, pdb_file_content)
245
+ ppb = PPBuilder()
246
+ for model in structure:
247
+ for pp in ppb.build_peptides(model):
248
+ sequence = pp.get_sequence()
249
+ for chain in model:
250
+ chain_id = chain.get_id()
251
+ # Extract UniProt ID if available in the chain's annotations
252
+ uniprot_ids = fetch_uniprot_ids(search)
253
+ # Get the resolution from the PDB header
254
+ header = structure.header
255
+ resolution = header.get('resolution', 'N/A')
256
+ # Print UniProt IDs, chain ID, and resolution for the current model
257
+ #for i, chain in enumerate(model, start=1):
258
+ chain_id = chain.get_id()
259
+ st.write(f"---- Information for Chain {chain_id} in Model {i} ----")
260
+ st.write(f"UniProt IDs: {', '.join(uniprot_ids)}")
261
+ st.write(f"Chain ID: {chain_id}")
262
+ st.write(f"PDB ID: {search.upper()}")
263
+ st.write(f"Resolution: {resolution}")
264
+ st.write(f"Sequence: {sequence}")
265
+ pdb_fasta.at[index, 'pdbID'] = search
266
+ pdb_fasta.at[index, 'chain'] = chain_id
267
+ pdb_fasta.at[index, 'pdbSequence'] = str(sequence)
268
+ pdb_info.at[index, 'uniprotID'] = record.dbxrefs[0].split(':')[1]
269
+ pdb_info.at[index, 'pdbID'] = search
270
+ pdb_info.at[index, 'chain'] = chain_id
271
+ pdb_info.at[index, 'resolution'] = resolution
272
+ index += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
  """
275
 
 
295
  pdb_info.at[index, 'chain'] = 'nan'
296
  pdb_info.at[index, 'resolution'] = 'nan'
297
  cnt += 1
298
+ """
299
  print()
300
  st.write()
301
  st.write(pdb_info)