Source code for skrough.instances

"""Helper functions related to data objects (instances)."""

import logging
from typing import List, Optional, Union

import numpy as np

import skrough.typing as rght
from skrough.logs import log_start_end
from skrough.permutations import get_permutation
from skrough.structs.group_index import GroupIndex
from skrough.unique import get_uniques_positions
from skrough.utils import get_positions_where_values_in
from skrough.weights import prepare_weights

logger = logging.getLogger(__name__)


[docs]@log_start_end(logger) def choose_objects( group_index: GroupIndex, y: np.ndarray, y_count: int, objs: Optional[Union[int, float, np.ndarray]] = None, weights: Optional[np.ndarray] = None, return_representatives_only: bool = False, seed: rght.Seed = None, ) -> List[int]: """Choose objects having uniform decision values within their groups. The function returns a list of objects that have unique decision values within their groups in ``group_index`` (see :class:`~skrough.structs.group_index.GroupIndex`). There are two ways to specify a hint on how the objects should be selected. If the ``objs`` argument is given then its value specifies the subset of objects along with their order in which the objects are handled. In such a case only the specified objects can appear in the result. Moreover, the objects that are earlier in ``objs`` take precedence, i.e., if two objects that belongs to the same group (determined by ``group_index``) has conflicting decision values then only the earlier one is selected for the result. If :code:`objs is None` then all objects represented in ``group_index`` are taken into consideration and their order of precedence is established by means of the ``weights`` input argument, and the :func:`~skrough.weights.prepare_weights` and :func:`~skrough.permutations.get_permutation` functions. The value of ``weights`` is taken into account only if :code:`objs is None`, otherwise it is ignored. The value used during the process of establishing the order of objects is equivalent to the following ``selector`` expression:: proba = prepare_weights(weights, group_index.n_objs, expand_none=False) selector = get_permutation(0, group_index.n_objs, proba, seed=seed) Thus, giving the opportunity to either set the ``weights`` used for drawing a permutation explicitly for each object (when :code:`len(weights) == group_index.n_objs`) or let permutation to be drawn from uniform distribution - see :func:`~skrough.weights.prepare_weights` and :func:`~skrough.permutations.get_permutation` for details. The ``return_representatives_only`` argument is used to control whether the result returned by the function should either include all non-conflicting objects (the default behavior) or to include at most one object from each group induced by ``group_index`` (when :code:`return_representatives_only is True`). Args: group_index: Group index that represents split of the objects represented by this structure into groups. y: Factorized decision values for the objects represented by the input ``group_index``. The values should be given in a form of integer-location based indexing sequence of the factorized decision values, i.e., 0-based values that index distinct decisions. y_count: Number of distinct decision attribute values. objs: A sequence of objects that the function should select from. It should be given in a form of integer-location based indexing sequence of the objects represented in ``group_index``. :obj:`None` value means to use all available objects. Defaults to :obj:`None`. weights: Used only if :code:`objs is None`. The value is used for establishing the order of precedence of objects by means of the :func:`~skrough.weights.prepare_weights` and :func:`~skrough.permutations.get_permutation` functions. It should be either :code:`len(weights) == group_index.n_objs`, a single weight value or :obj:`None`. return_representatives_only: A flag controlling if the result should include to all non-conflicting objects (when set to ``True``) or to include at most one object from each group (when set to :obj:`False`). Defaults to :obj:`False`. seed: Random seed. Defaults to :obj:`None`. Returns: A set of objects having uniform decision values within their groups determined by ``group_index``. The return value has a form of integer-location based indexing sequence of objects represented by ``group_index``. Examples: >>> group_index = GroupIndex.from_index([0, 0, 1, 1]) >>> dec = np.array([0, 1, 0, 0]) >>> choose_objects(group_index, y=dec, y_count=2, objs=np.array([0, 1, 2, 3])) [0, 2, 3] >>> choose_objects(group_index, y=dec, y_count=2, objs=np.array([1, 0, 2, 3])) [1, 2, 3] >>> choose_objects(group_index, y=dec, y_count=2, objs=np.array([0, 1, 2, 3]), ... return_representatives_only=True) [0, 2] >>> choose_objects(group_index, y=dec, y_count=2, objs=np.array([0, 1, 3, 2]), ... return_representatives_only=True) [0, 3] """ # TODO: add arguments validation # 1) objs is not None => weights is None # 2) objs is None and weights is not None => len(group_index.index) == len(weights) if len(group_index.index) == 0: return [] if objs is None: proba = prepare_weights(weights, group_index.n_objs, expand_none=False) selector = get_permutation(0, group_index.n_objs, proba, seed=seed) else: selector = np.asarray(objs) idx = get_uniques_positions(group_index.index[selector]) representatives_ids = selector[idx] if return_representatives_only: result = sorted(representatives_ids) else: group_index_dec = group_index.split( y, y_count, compress=False, ) group_ids = group_index_dec.index[representatives_ids] result = get_positions_where_values_in( values=group_index_dec.index, reference=group_ids ) return result