Source code for skrough.checks

"""Rough sets related check/test functions.

The :mod:`skrough.checks` module delivers rough sets related check functions of various
kinds.
"""

from typing import Any, Optional, Union

import numpy as np

import skrough.typing as rght
from skrough.disorder_measures.disorder_measures import conflicts_count
from skrough.disorder_score import get_disorder_score_for_data, get_disorder_score_stats
from skrough.instances import choose_objects
from skrough.structs.group_index import GroupIndex
from skrough.unify import unify_locations
from skrough.unique import get_rows_nunique


def _get_locations_based_selector(
    locations: Optional[rght.LocationsLike] = None,
) -> Union[rght.Locations, slice]:
    """Get locations index to be used to index ndarrays."""
    return unify_locations(locations) if locations is not None else slice(None)


[docs]def check_if_functional_dependency( x: np.ndarray, y: np.ndarray, objs: Optional[rght.LocationsLike] = None, attrs: Optional[rght.LocationsLike] = None, ) -> bool: """Check functional dependency between conditional attributes and the decision. Check functional dependency between conditional attributes and the decision. The check is based on the number of duplicated rows induced by the given subset of attributes either with or without the decision attribute. If the number of duplicated rows is the same, the functional dependency holds. The check can be further narrowed to the given subset of attributes and objects. Args: x: Input data table. y: Input decisions. objs: A subset of objects that the check should be performed on. It should be given in a form of a sequence of integer-location based indexing of the selected objects/rows/instances from ``x``. :obj:`None` value means to use all available objects. Defaults to :obj:`None`. attrs: A subset of conditional attributes the check should be performed on. It should be given in a form of a sequence of integer-location based indexing of the selected conditional attributes from ``x``. :obj:`None` value means to use all available conditional attributes. Defaults to :obj:`None`. Returns: Indication whether functional dependency holds for the given input. """ objs_selector: Union[rght.Locations, slice] = _get_locations_based_selector(objs) attrs_selector: Union[rght.Locations, slice] = _get_locations_based_selector(attrs) x_index_expr: Any if isinstance(objs_selector, slice) or isinstance(attrs_selector, slice): x_index_expr = np.index_exp[objs_selector, attrs_selector] else: # we want to take all ``objects`` x ``attributes`` x_index_expr = np.ix_(objs_selector, attrs_selector) data = x[x_index_expr] nunique = get_rows_nunique(data) data = np.column_stack((data, y[objs_selector])) nunique_with_dec = get_rows_nunique(data) return nunique == nunique_with_dec
[docs]def check_if_consistent_table( x: np.ndarray, y: np.ndarray, ) -> bool: """Check if decision table is consistent. Check if decision table is consistent, i.e., check if it is possible to discern objects with different decisions by means of conditional attributes. It is realized just as a simple wrapper around ``check_if_functional_dependency`` function using all available objects and attributes. Args: x: Input data table. y: Input decisions. Returns: Indication whether the decision table is consistent. """ return check_if_functional_dependency(x, y)
[docs]def check_if_reduct( x: np.ndarray, x_counts: np.ndarray, y: np.ndarray, y_count: int, attrs: rght.LocationsLike, consistent_table_check: bool = True, ) -> bool: """Check if specified attributes form a reduct. Functions checks if the given subset of attributes ``attrs`` forms a decision reduct for the given input data ``x`` and the decisions ``y``. An additional verification of data-decisions consistency can be enforced before the actual decision reduct check. If the ``consistent_table_check`` flag equals to ``True`` then the input data-decisions consistency is a necessary condition for ``attrs`` being checked to be a reduct. Args: x: Input data table. y: Input decisions. attrs: A subset of conditional attributes the check should be performed on. It should be given in a form of a sequence of integer-location based indexing of the selected conditional attributes from ``x``. consistent_table_check: Whether decision table consistency check should be performed prior to other checks. Defaults to True. Returns: Indication whether the specified attributes form a reduct. """ if consistent_table_check and not check_if_consistent_table(x, y): return False return check_if_approx_reduct( x=x, x_counts=x_counts, y=y, y_count=y_count, attrs=attrs, disorder_fun=conflicts_count, epsilon=0, check_attrs_reduction=True, )
[docs]def check_if_approx_reduct( x: np.ndarray, x_counts: np.ndarray, y: np.ndarray, y_count: int, attrs: rght.LocationsLike, disorder_fun: rght.DisorderMeasure, epsilon: float, check_attrs_reduction: bool = True, ) -> bool: """Check if specified attributes form an approximate reduct. _extended_summary_ Args: x: _description_ x_counts: _description_ y: _description_ y_count: _description_ attrs: _description_ disorder_fun: _description_ epsilon: _description_ check_attrs_reduction: _description_. Defaults to True. Returns: Indication whether the specified attributes form an approximate reduct with respect to the given disorder function and epsilon. """ disorder_score_stats = get_disorder_score_stats( x, x_counts, y, y_count, disorder_fun=disorder_fun, epsilon=epsilon, increment_attrs=[attrs], ) # use assert to type hint assert disorder_score_stats.for_increment_attrs # nosec assert_used assert disorder_score_stats.approx_threshold is not None # nosec assert_used is_superreduct = ( # pylint: disable-next=unsubscriptable-object disorder_score_stats.for_increment_attrs[0] <= disorder_score_stats.approx_threshold ) if not is_superreduct: return False if check_attrs_reduction: all_attrs = set(attrs) for i in attrs: reduced_disorder_score = get_disorder_score_for_data( x, x_counts, y, y_count, disorder_fun=disorder_fun, attrs=list(all_attrs - {i}), ) if reduced_disorder_score <= disorder_score_stats.approx_threshold: return False return True
[docs]def check_if_bireduct( x: np.ndarray, x_counts: np.ndarray, y: np.ndarray, y_count: int, objs: rght.LocationsLike, attrs: rght.LocationsLike, ) -> bool: """Check if specified objects and attributes form a bireduct. _extended_summary_ Args: x: Input data table. x_counts: _description_ y: Input decision. y_count: _description_ objs: _description_ attrs: _description_ Returns: Indication whether the specified objects and attributes form a reduct. """ objs_selector = _get_locations_based_selector(objs) if not check_if_reduct( x[objs_selector], x_counts, y[objs_selector], y_count, attrs=attrs ): return False group_index = GroupIndex.from_data(x, x_counts, attrs) all_objs = np.concatenate((objs, np.arange(len(x)))) chosen_objs = choose_objects(group_index, y, y_count, all_objs) return set(chosen_objs) == set(objs)