Spaces:
Runtime error
Runtime error
import json | |
from collections import Counter | |
import os | |
_pfam_dict = None | |
_pfam_descriptions = None | |
def _load_pfam_data(protein2pfam_path): | |
global _pfam_dict | |
if _pfam_dict is None: | |
with open(protein2pfam_path, 'r') as file: | |
_pfam_dict = json.load(file) | |
def _load_pfam_descriptions(pfam_descriptions_path): | |
global _pfam_descriptions | |
if _pfam_descriptions is None: | |
with open(pfam_descriptions_path, 'r') as file: | |
_pfam_descriptions = json.load(file) | |
def get_motif_pfam(protein_id, protein2pfam_path, pfam_descriptions_path): | |
""" | |
获取指定蛋白质的pfam信息及其定义 | |
参数: | |
protein_id: str - 蛋白质ID | |
protein2pfam_path: str - interproscan_info.json文件路径 | |
pfam_descriptions_path: str - pfam描述文件路径 | |
返回: | |
dict - pfam_id到定义的映射字典,例如{"PF04820": "definition content"} | |
""" | |
_load_pfam_data(protein2pfam_path) | |
_load_pfam_descriptions(pfam_descriptions_path) | |
if protein_id not in _pfam_dict: | |
return {} | |
protein_info = _pfam_dict[protein_id] | |
_pfam_dicts = protein_info.get('interproscan_results', {}).get('pfam_id', []) | |
pfam_ids = [] | |
for pfam_dict in _pfam_dicts: | |
for key,value in pfam_dict.items(): | |
pfam_ids.append(key) | |
result = {} | |
for pfam_id in pfam_ids: | |
if pfam_id in _pfam_descriptions: | |
result[pfam_id] = _pfam_descriptions[pfam_id]['description'] | |
return result | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--protein_id", type=str, required=False, default="A8CF74") | |
parser.add_argument("--protein2pfam_path", type=str, required=False, default="data/processed_data/interproscan_info.json") | |
parser.add_argument("--pfam_descriptions_path", type=str, required=False, default="data/raw_data/all_pfam_descriptions.json") | |
args = parser.parse_args() | |
result = get_motif_pfam(args.protein_id, args.protein2pfam_path, args.pfam_descriptions_path) | |
print(result) | |