sklearn.feature_selection.SelectFwe

Here are the examples of the python api sklearn.feature_selection.SelectFwe taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

7 Examples 7

Example 1

Project: scikit-learn Source File: test_feature_select.py
def test_select_fwe_regression():
    # Test whether the relative univariate feature selection
    # gets the correct items in a simple regression problem
    # with the fwe heuristic
    X, y = make_regression(n_samples=200, n_features=20,
                           n_informative=5, shuffle=False, random_state=0)

    univariate_filter = SelectFwe(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, y).transform(X)
    X_r2 = GenericUnivariateSelect(
        f_regression, mode='fwe', param=0.01).fit(X, y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool))
    assert_less(np.sum(support[5:] == 1), 2)

Example 2

Project: delft Source File: tests.py
def test_select_fwe_2():
        """Ensure that the TPOT select fwe outputs the same result as sklearn fwe when alpha > 0.05"""
        tpot_obj = TPOT()
        non_feature_columns = ['class', 'group', 'guess']
        training_features = training_testing_data.loc[training_testing_data['group'] == 'training'].drop(non_feature_columns, axis=1)
        training_class_vals = training_testing_data.loc[training_testing_data['group'] == 'training', 'class'].values

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=UserWarning)
            selector = SelectFwe(f_classif, alpha=0.05)
            selector.fit(training_features, training_class_vals)
            mask = selector.get_support(True)
        mask_cols = list(training_features.iloc[:, mask].columns) + non_feature_columns

        assert np.array_equal(tpot_obj._select_fwe(training_testing_data, 1), training_testing_data[mask_cols])

Example 3

Project: delft Source File: tests.py
def test_select_fwe_3():
        """Ensure that the TPOT select fwe outputs the same result as sklearn fwe when alpha < 0.001"""
        tpot_obj = TPOT()
        non_feature_columns = ['class', 'group', 'guess']
        training_features = training_testing_data.loc[training_testing_data['group'] == 'training'].drop(non_feature_columns, axis=1)
        training_class_vals = training_testing_data.loc[training_testing_data['group'] == 'training', 'class'].values

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=UserWarning)
            selector = SelectFwe(f_classif, alpha=0.001)
            selector.fit(training_features, training_class_vals)
            mask = selector.get_support(True)
        mask_cols = list(training_features.iloc[:, mask].columns) + non_feature_columns

        assert np.array_equal(tpot_obj._select_fwe(training_testing_data, 0.0001), training_testing_data[mask_cols])

Example 4

Project: delft Source File: tests.py
def test_select_fwe_4():
        """Ensure that the TPOT select fwe outputs the same result as sklearn fwe when 0.001 < alpha < 0.05"""
        tpot_obj = TPOT()
        non_feature_columns = ['class', 'group', 'guess']
        training_features = training_testing_data.loc[training_testing_data['group'] == 'training'].drop(non_feature_columns, axis=1)
        training_class_vals = training_testing_data.loc[training_testing_data['group'] == 'training', 'class'].values

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=UserWarning)
            selector = SelectFwe(f_classif, alpha=0.042)
            selector.fit(training_features, training_class_vals)
            mask = selector.get_support(True)
        mask_cols = list(training_features.iloc[:, mask].columns) + non_feature_columns

        assert np.array_equal(tpot_obj._select_fwe(training_testing_data, 0.042), training_testing_data[mask_cols])

Example 5

Project: scikit-learn Source File: test_feature_select.py
def test_no_feature_selected():
    rng = np.random.RandomState(0)

    # Generate random uncorrelated data: a strict univariate test should
    # rejects all the features
    X = rng.rand(40, 10)
    y = rng.randint(0, 4, size=40)
    strict_selectors = [
        SelectFwe(alpha=0.01).fit(X, y),
        SelectFdr(alpha=0.01).fit(X, y),
        SelectFpr(alpha=0.01).fit(X, y),
        SelectPercentile(percentile=0).fit(X, y),
        SelectKBest(k=0).fit(X, y),
    ]
    for selector in strict_selectors:
        assert_array_equal(selector.get_support(), np.zeros(10))
        X_selected = assert_warns_message(
            UserWarning, 'No features were selected', selector.transform, X)
        assert_equal(X_selected.shape, (40, 0))

Example 6

Project: scikit-learn Source File: test_feature_select.py
def test_select_heuristics_classif():
    # Test whether the relative univariate feature selection
    # gets the correct items in a simple classification problem
    # with the fdr, fwe and fpr heuristics
    X, y = make_classification(n_samples=200, n_features=20,
                               n_informative=3, n_redundant=2,
                               n_repeated=0, n_classes=8,
                               n_clusters_per_class=1, flip_y=0.0,
                               class_sep=10, shuffle=False, random_state=0)

    univariate_filter = SelectFwe(f_classif, alpha=0.01)
    X_r = univariate_filter.fit(X, y).transform(X)
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    for mode in ['fdr', 'fpr', 'fwe']:
        X_r2 = GenericUnivariateSelect(
            f_classif, mode=mode, param=0.01).fit(X, y).transform(X)
        assert_array_equal(X_r, X_r2)
        support = univariate_filter.get_support()
        assert_array_almost_equal(support, gtruth)

Example 7

Project: scikit-learn Source File: test_feature_select.py
def test_boundary_case_ch2():
    # Test boundary case, and always aim to select 1 feature.
    X = np.array([[10, 20], [20, 20], [20, 30]])
    y = np.array([[1], [0], [0]])
    scores, pvalues = chi2(X, y)
    assert_array_almost_equal(scores, np.array([4., 0.71428571]))
    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))

    filter_fdr = SelectFdr(chi2, alpha=0.1)
    filter_fdr.fit(X, y)
    support_fdr = filter_fdr.get_support()
    assert_array_equal(support_fdr, np.array([True, False]))

    filter_kbest = SelectKBest(chi2, k=1)
    filter_kbest.fit(X, y)
    support_kbest = filter_kbest.get_support()
    assert_array_equal(support_kbest, np.array([True, False]))

    filter_percentile = SelectPercentile(chi2, percentile=50)
    filter_percentile.fit(X, y)
    support_percentile = filter_percentile.get_support()
    assert_array_equal(support_percentile, np.array([True, False]))

    filter_fpr = SelectFpr(chi2, alpha=0.1)
    filter_fpr.fit(X, y)
    support_fpr = filter_fpr.get_support()
    assert_array_equal(support_fpr, np.array([True, False]))

    filter_fwe = SelectFwe(chi2, alpha=0.1)
    filter_fwe.fit(X, y)
    support_fwe = filter_fwe.get_support()
    assert_array_equal(support_fwe, np.array([True, False]))