Spaces:

obi
/

Medical-Note-Deidentification

Running

Medical-Note-Deidentification / ner_datasets /distribution /ner_distribution.py

Prajwal Kailas

dependency to run

45c1511 over 3 years ago

2.3 kB

	from collections import Counter, defaultdict
	from typing import Sequence, Mapping, NoReturn


	class NERDistribution(object):
	"""
	Store the distribution of ner types based on some key.
	That is we store the NER type distribution for some given key value and we update
	the distribution when spans related to that key is passed
	"""

	def __init__(self) -> NoReturn:
	"""
	Initialize the NER type - count mapping
	"""
	# Counter the captures the ner types and counts per patient/note_id in the dataset
	# Depending on what we set the group_key as. Basically gather counts with respect
	# to some grouping of the notes
	# E.g - {{PATIENT 1: {AGE: 99, DATE: 55, ...}, {PATIENT 2: {AGE: 5, DATE: 9, ...} ... }
	self._ner_distribution = defaultdict(Counter)

	def update_distribution(self, spans: Sequence[Mapping[str, str]], key: str) -> NoReturn:
	"""
	Update the distribution of ner types for the given key
	Args:
	spans (Sequence[Mapping[str, str]]): The list of spans in the note
	key (str): The note id or patient id of the note (some grouping)
	"""
	# Go through the spans in the note and compute the ner distribution
	# Compute both the overall ner distribution and ner distribution per
	# patient (i.e the ner types in all the notes associated with the patient)
	if not self._ner_distribution.get(key, False):
	self._ner_distribution[key] = Counter()
	for span in spans:
	self._ner_distribution[key][span['label']] += 1

	def get_ner_distribution(self) -> defaultdict:
	"""
	Return overall ner distribution. The NER type distribution for every key.
	Returns:
	ner_distribution (defaultdict(Counter)): Overall NER type distribution for all keys
	"""
	return self._ner_distribution

	def get_group_distribution(self, key: str) -> Counter:
	"""
	Return the NER type distribution for the given key
	Returns:
	(Counter): ner distribution w.r.t some grouping (key)
	"""
	if key in self._ner_distribution.keys():
	return self._ner_distribution[key]
	else:
	raise ValueError('Key not found')