1 import sys
-
F401
'numpy as np' imported but unused
2 import numpy as np
3
4 PY2 = sys.version_info < (3,)
5 if PY2:
6 # Python 2 is not happy with our package having the same name, we need dynamic import
7 import importlib
8 sklearn_exc = importlib.import_module("sklearn.exceptions")
9 NotFittedError = sklearn_exc.NotFittedError
10 else:
11 # normal imports on python 3
12 from sklearn.exceptions import NotFittedError
13
-
E402
Module level import not at top of file
14 from .compat import BaseEstimator, SelectorMixin
-
E402
Module level import not at top of file
15 from .main import qd_screen
16
17
18 class QDScreen(SelectorMixin, BaseEstimator):
19 """Feature selector that removes all features that are (quasi-)deterministically predicted from others.
20
21 This feature selection algorithm looks only at the features (X), not the
22 desired outputs (y), and can thus be used for unsupervised learning.
23
24 Read more in the User Guide.
25
26 Parameters
27 ----------
28 absolute_eps : float, optional
29 Absolute entropy threshold. Any feature Y that can be predicted from
30 another feature X in a quasi-deterministic way, that is, where
31 conditional entropy H(Y|X) <= absolute_eps, will be removed. The default
32 value is 0 and corresponds to removing deterministic relationships only.
33 relative_eps : float, optional
34 Relative entropy threshold. Any feature Y that can be predicted from
35 another feature X in a quasi-deterministic way, that is, where relative
36 conditional entropy H(Y|X)/H(Y) <= relative_eps (between 0 and 1), will
37 be removed. Only one of absolute_eps and relative_eps should be
38 provided.
39
40 Attributes
41 ----------
42 model_ : instance of ``QDForest``
43 Variances of individual features.
44
45 Notes
46 -----
47 Allows NaN in the input.
48
49 Examples
50 --------
51 TODO make this better ? see test_readme.py
52 The following dataset has integer features, two of which are constant, and all of which being 'predictable' from
53 the third one::
54
55 >>> X = [[0, 2, 0, 3],
56 ... [0, 1, 4, 3],
57 ... [0, 1, 1, 3]]
58 >>> selector = QDScreen()
59 >>> Xsel = selector.fit_transform(X)
60 >>> Xsel
61 array([[0],
62 [4],
63 [1]])
64 >>> selector.inverse_transform(Xsel)
65 array([[0, 2, 0, 3],
66 ... [0, 1, 4, 3],
67 ... [0, 1, 1, 3]])
68 """
69
70 def __init__(self,
71 absolute_eps=None, relative_eps=None):
72 self.absolute_eps = absolute_eps
73 self.relative_eps = relative_eps
74
75 def fit(self, X, y=None):
76 """Learn determinism from X.
77
78 Parameters
79 ----------
80 X : {array-like, sparse matrix}, shape (n_samples, n_features)
81 Sample vectors from which to compute variances.
82
83 y : any
84 Ignored. This parameter exists only for compatibility with
85 sklearn.pipeline.Pipeline.
86
87 Returns
88 -------
89 self
90 """
-
E262
Inline comment should start with '# '
91 X = self._validate_data(X, accept_sparse=False, #('csr', 'csc'),
92 dtype=object,
93 force_all_finite='allow-nan')
94
95 # if hasattr(X, "toarray"): # sparse matrix
96 # _, self.variances_ = mean_variance_axis(X, axis=0)
97 # if self.threshold == 0:
98 # mins, maxes = min_max_axis(X, axis=0)
99 # peak_to_peaks = maxes - mins
100 # else:
101
102 # First find the forest structure
103 forest_ = qd_screen(X, absolute_eps=self.absolute_eps, relative_eps=self.relative_eps)
104
105 # Then learn the parameter maps
106 self.model_ = forest_.fit_selector_model(X)
107
108 return self
109
110 def check_is_fitted(self):
111 if not hasattr(self, "model_"):
112 msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
113 "appropriate arguments before using this estimator.")
114 raise NotFittedError(msg % {'name': type(self).__name__})
115
116 def _get_support_mask(self):
117 self.check_is_fitted()
118 return self.model_.forest.roots_mask_ar
119
120 def inverse_transform(self, X):
121 """
122 Reverse the transformation operation
123
124 Parameters
125 ----------
126 X : array of shape [n_samples, n_selected_features]
127 The input samples.
128
129 Returns
130 -------
131 X_r : array of shape [n_samples, n_original_features]
132 `X` with columns of zeros inserted where features would have
133 been removed by :meth:`transform`.
134 """
135 Xt = super(QDScreen, self).inverse_transform(X)
136 # use inplace = True because Xt is already prepared
137 self.model_.predict_qd(Xt, inplace=True)
138 return Xt
139
140 def _more_tags(self):
141 return {'allow_nan': True}