from interproscan import InterproScan from Bio.Blast.Applications import NcbiblastpCommandline from utils.utils import extract_interproscan_metrics, get_seqnid, extract_blast_metrics, rename_interproscan_keys import os import json # input fasta file input_fasta = "evolla_test/test_hq0704_da_w_plddt_mask_hard_idnseqs.fasta" ##################################################### # run blast ##################################################### # settings blast_database = "uniprot_swissprot" expect_value = 0.01 blast_xml = "evolla_test/test_hq0704_da_w_plddt_mask_hard_blast.xml" seq_dict = get_seqnid(input_fasta) output_dir = os.path.dirname(blast_xml) if not os.path.exists(output_dir): os.makedirs(output_dir) blast_cmd = NcbiblastpCommandline( query=input_fasta, db=blast_database, out=blast_xml, outfmt=5, # XML 格式 evalue=expect_value ) blast_cmd() # 运行 blast_results = extract_blast_metrics(blast_xml) blast_info = {} for uid, info in blast_results.items(): blast_info[uid] = {"sequence": seq_dict[uid], "blast_results": info} # save blast results with open(blast_xml.replace(".xml", ".json"), "w") as f: json.dump(blast_info, f, indent=4) ##################################################### # run interproscan ##################################################### # settings goterms = True pathways = True interproscan_json = "evolla_test/test_hq0704_da_w_plddt_mask_hard_interproscan.json" interproscan_path = "interproscan/interproscan-5.75-106.0/interproscan.sh" librarys = ["PFAM", "PIRSR", "PROSITE_PROFILES", "SUPERFAMILY", "PRINTS", "PANTHER", "CDD", "GENE3D", "NCBIFAM", "SFLM", "MOBIDB_LITE", "COILS", "PROSITE_PATTERNS", "FUNFAM", "SMART"] interproscan = InterproScan(interproscan_path) input_args = { "fasta_file": input_fasta, "goterms": goterms, "pathways": pathways, "save_dir": interproscan_json} interproscan.run(**input_args) # 运行 # output_name = input_fasta.split("/")[-1] + ".json" interproscan_results = extract_interproscan_metrics(interproscan_json, librarys=librarys) interproscan_info = {} for id, seq in seq_dict.items(): info = interproscan_results[seq] info = rename_interproscan_keys(info) interproscan_info[id] = {"sequence":seq, "interproscan_results": info} # save blast results with open(interproscan_json, "w") as f: json.dump(interproscan_info, f, indent=4)