Source code for arfs.feature_selection.summary
"""Feature Selection Summary Module
This module provides a function for creating the summary report of a FS pipeline
Module Structure:
-----------------
- ``make_fs_summary`` main function for creating the summary
- ``highlight_discarded`` function for creating style for the pd.DataFrame
"""
import pandas as pd
import numpy as np
[docs]def highlight_discarded(s):
"""highlight X in red and V in green.
Parameters
----------
s : array-like of shape (n_features,)
the boolean array for defining the style
"""
is_X = s == 0
return [
"background-color: #ba0202" if v else "background-color: #0c8a30" for v in is_X
]
[docs]def make_fs_summary(selector_pipe):
"""make_fs_summary makes a summary dataframe highlighting at which step a
given predictor has been rejected (if any).
Parameters
----------
selector_pipe : sklearn.pipeline.Pipeline
the feature selector pipeline.
Examples
--------
>>> groot_pipeline = Pipeline([
... ('missing', MissingValueThreshold()),
... ('unique', UniqueValuesThreshold()),
... ('cardinality', CardinalityThreshold()),
... ('collinearity', CollinearityThreshold(threshold=0.5)),
... ('lowimp', VariableImportance(eval_metric='poisson', objective='poisson', verbose=2)),
... ('grootcv', GrootCV(objective='poisson', cutoff=1, n_folds=3, n_iter=5))])
>>> groot_pipeline.fit_transform(
X=df[predictors],
y=df[target],
lowimp__sample_weight=df[weight],
grootcv__sample_weight=df[weight])
>>> fs_summary_df = make_fs_summary(groot_pipeline)
"""
tag_df = pd.DataFrame({"predictor": selector_pipe[0].feature_names_in_})
for selector_name, selector in selector_pipe.named_steps.items():
if hasattr(selector, "support_"):
feature_in = selector.feature_names_in_
to_drop = list(set(feature_in) - set(selector.get_feature_names_out()))
tag_df[selector_name] = np.where(
tag_df["predictor"].isin(to_drop), 0, 1
) * np.where(tag_df["predictor"].isin(feature_in), 1, np.nan)
else:
tag_df[selector_name] = np.nan
style = (
tag_df.style.apply(highlight_discarded, subset=tag_df.columns[1:])
.applymap(lambda x: "" if x == x else "background-color: #f57505")
.format(precision=0)
)
return style