libokj commited on
Commit
be4442c
·
1 Parent(s): 319b9d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -12,6 +12,7 @@ from pathlib import Path
12
  import sys
13
 
14
  import numpy as np
 
15
  from Bio.Align import PairwiseAligner
16
  # from email_validator import validate_email
17
  import gradio as gr
@@ -1178,11 +1179,25 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1178
  if library in TARGET_LIBRARY_MAP.keys():
1179
  identify_df = pd.read_csv(Path('data/target_libraries', TARGET_LIBRARY_MAP[library]))
1180
  else:
1181
- identify_df = pd.read_csv(library_upload)
 
 
 
 
 
 
 
 
1182
  validate_columns(identify_df, ['X2'])
1183
 
1184
  identify_df['X1'] = smiles
1185
-
 
 
 
 
 
 
1186
  job_id = uuid4()
1187
  temp_file = Path(f'{job_id}_input.csv').resolve()
1188
  identify_df.to_csv(temp_file, index=False)
 
12
  import sys
13
 
14
  import numpy as np
15
+ from Bio import SeqIO
16
  from Bio.Align import PairwiseAligner
17
  # from email_validator import validate_email
18
  import gradio as gr
 
1179
  if library in TARGET_LIBRARY_MAP.keys():
1180
  identify_df = pd.read_csv(Path('data/target_libraries', TARGET_LIBRARY_MAP[library]))
1181
  else:
1182
+ if library_upload.endswith('.csv'):
1183
+ identify_df = pd.read_csv(library_upload)
1184
+ elif library_upload.endswith('.fasta'):
1185
+ records = list(SeqIO.parse(library_upload, "fasta"))
1186
+ id2 = [record.id for record in records]
1187
+ seq = [str(record.seq) for record in records]
1188
+ identify_df = pd.DataFrame({'ID2': id2, 'X2': seq})
1189
+ else:
1190
+ raise 'Currently only csv and fasta files are supported.'
1191
  validate_columns(identify_df, ['X2'])
1192
 
1193
  identify_df['X1'] = smiles
1194
+ if not np.isin('ID1', identify_df.columns):
1195
+ identify_df['ID1'] = 'Input'
1196
+ if not np.isin('ID2', identify_df.columns):
1197
+ identify_df['ID2'] = list(range(identify_df.shape[0]))
1198
+ identify_df = identify_df.loc[:, ['ID1', 'X1', 'ID2', 'X2']]
1199
+ identify_df['Y'] = 0
1200
+
1201
  job_id = uuid4()
1202
  temp_file = Path(f'{job_id}_input.csv').resolve()
1203
  identify_df.to_csv(temp_file, index=False)