Spaces:
Runtime error
Runtime error
from interproscan import InterproScan | |
from Bio.Blast.Applications import NcbiblastpCommandline | |
from utils.utils import extract_interproscan_metrics, get_seqnid, extract_blast_metrics, rename_interproscan_keys | |
import os | |
import json | |
# input fasta file | |
input_fasta = "evolla_test/test_hq0704_da_w_plddt_mask_hard_idnseqs.fasta" | |
##################################################### | |
# run blast | |
##################################################### | |
# settings | |
blast_database = "uniprot_swissprot" | |
expect_value = 0.01 | |
blast_xml = "evolla_test/test_hq0704_da_w_plddt_mask_hard_blast.xml" | |
seq_dict = get_seqnid(input_fasta) | |
output_dir = os.path.dirname(blast_xml) | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
blast_cmd = NcbiblastpCommandline( | |
query=input_fasta, | |
db=blast_database, | |
out=blast_xml, | |
outfmt=5, # XML 格式 | |
evalue=expect_value | |
) | |
blast_cmd() # 运行 | |
blast_results = extract_blast_metrics(blast_xml) | |
blast_info = {} | |
for uid, info in blast_results.items(): | |
blast_info[uid] = {"sequence": seq_dict[uid], "blast_results": info} | |
# save blast results | |
with open(blast_xml.replace(".xml", ".json"), "w") as f: | |
json.dump(blast_info, f, indent=4) | |
##################################################### | |
# run interproscan | |
##################################################### | |
# settings | |
goterms = True | |
pathways = True | |
interproscan_json = "evolla_test/test_hq0704_da_w_plddt_mask_hard_interproscan.json" | |
interproscan_path = "interproscan/interproscan-5.75-106.0/interproscan.sh" | |
librarys = ["PFAM", "PIRSR", "PROSITE_PROFILES", "SUPERFAMILY", "PRINTS", "PANTHER", "CDD", "GENE3D", "NCBIFAM", "SFLM", "MOBIDB_LITE", "COILS", "PROSITE_PATTERNS", "FUNFAM", "SMART"] | |
interproscan = InterproScan(interproscan_path) | |
input_args = { | |
"fasta_file": input_fasta, | |
"goterms": goterms, | |
"pathways": pathways, | |
"save_dir": interproscan_json} | |
interproscan.run(**input_args) # 运行 | |
# output_name = input_fasta.split("/")[-1] + ".json" | |
interproscan_results = extract_interproscan_metrics(interproscan_json, | |
librarys=librarys) | |
interproscan_info = {} | |
for id, seq in seq_dict.items(): | |
info = interproscan_results[seq] | |
info = rename_interproscan_keys(info) | |
interproscan_info[id] = {"sequence":seq, "interproscan_results": info} | |
# save blast results | |
with open(interproscan_json, "w") as f: | |
json.dump(interproscan_info, f, indent=4) |