Spaces:
Sleeping
Sleeping
| from numbers import Number | |
| from typing import Optional, Union | |
| import numpy as np | |
| from deepscreen.utils import get_logger | |
| log = get_logger(__name__) | |
| MOLARITY_TO_POTENCY = { | |
| 'p': lambda x: x, | |
| 'M': lambda x: -np.log10(x), | |
| 'mM': lambda x: -np.log10(x) + 3, | |
| 'μM': lambda x: -np.log10(x) + 6, | |
| 'uM': lambda x: -np.log10(x) + 6, # in case someone doesn't know how to type micromolar lol | |
| 'nM': lambda x: -np.log10(x) + 9, | |
| 'pM': lambda x: -np.log10(x) + 12, | |
| 'fM': lambda x: -np.log10(x) + 15, | |
| } | |
| # TODO rewrite for swifter.apply | |
| def molar_to_p(labels, units): | |
| assert units in MOLARITY_TO_POTENCY, f"Allowed units: {', '.join(MOLARITY_TO_POTENCY)}." | |
| unit_converted_labels = [] | |
| for label, unit in (labels, units): | |
| unit_converted_labels.append(MOLARITY_TO_POTENCY[unit](label)) | |
| labels = np.array(unit_converted_labels) | |
| return labels | |
| def label_discretize(labels, thresholds): | |
| # if isinstance(threshold, Number): | |
| # labels = np.where(labels < threshold, 1, 0) | |
| # else: | |
| # labels = np.where(labels < threshold[0], 1, np.where(labels > threshold[1], 0, np.nan)) | |
| if isinstance(thresholds, Number): | |
| labels = 1 - np.digitize(labels, [thresholds]) | |
| else: | |
| labels = np.digitize(labels, np.sort(thresholds)[::-1]) | |
| return labels | |
| def label_transform( | |
| labels, | |
| units: Optional[list[str]], | |
| thresholds: Optional[Union[float, list[Number]]], | |
| discard_intermediate: Optional[bool] | |
| ): | |
| f"""Convert labels of all units to p scale (-log10[M]) and binarize them if specified. | |
| :param labels: a sequence of labels, continuous or binary values | |
| :type labels: array_like | |
| :param units: a sequence of label units in {', '.join(MOLARITY_TO_POTENCY)} | |
| :type units: array_like, optional | |
| :param thresholds: discretization threshold(s) for affinity labels, in p scale (-log10[M]). | |
| A single number maps affinities below it to 1 and otherwise to 0. | |
| A tuple of two or more thresholds maps affinities to multiple discrete levels descendingly, assigning values | |
| values below the lowest threshold to the highest level (e.g. 2) and values above the greatest threshold to 0 | |
| :type thresholds: list, float, optional | |
| :param discard_intermediate: whether to discard the intermediate (indeterminate) level if provided an odd | |
| number of thresholds (>=3) | |
| :type discard_intermediate: bool | |
| :return: a numpy array of affinity labels in p scale (-log10[M]) or discrete labels | |
| """ | |
| # # Check if labels are already discrete (ignoring NAs). | |
| # discrete = labels.dropna().isin([0, 1]).all() | |
| # | |
| # if discrete: | |
| # assert discretize, "Cannot train a regression model with discrete labels." | |
| # if thresholds: | |
| # warn("Ignoring 'threshold' because 'Y' (labels) in the data table is already binary.") | |
| # if units: | |
| # warn("Ignoring 'units' because 'Y' (labels) in the data table is already binary.") | |
| # labels = labels | |
| if units: | |
| labels = molar_to_p(labels, units) | |
| if thresholds: | |
| labels = label_discretize(labels, thresholds) | |
| if discard_intermediate: | |
| assert len(thresholds) % 2 == 1 and len(thresholds) >= 3, \ | |
| "Must give an odd number of (at least 3) thresholds to discard the intermediate level." | |
| intermediate_level = len(thresholds) // 2 | |
| # Make the intermediate-level labels NaN (which will be filtered out later) | |
| labels[labels == intermediate_level] = np.nan | |
| # Reduce all levels above the intermediate level by 1 | |
| labels[labels > intermediate_level] -= 1 | |
| return labels | |