Hier ist ein reproduzierbares Beispiel:
Code: Select all
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
# Load the Titanic dataset
titanic = sns.load_dataset('titanic')
# Select features and target
features = ['age', 'fare', 'sex']
X = titanic[features]
y = titanic['survived']
# Preprocessing pipelines for numeric and categorical features
numeric_features = ['age', 'fare']
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant')),
('scaler', StandardScaler())
])
categorical_features = ['sex']
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant')),
('onehot', OneHotEncoder(drop='first'))
])
# Combine preprocessing steps
preprocessor = ColumnTransformer(transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)
])
# Initialize classifier and feature selector
clf = LogisticRegression(max_iter=1000, solver='liblinear')
sfs = SequentialFeatureSelector(clf, direction='forward')
# Create a pipeline that includes preprocessing, feature selection, and classification
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('feature_selection', sfs),
('classifier', clf)
])
# Define the parameter grid to search over
param_grid = {
'feature_selection__n_features_to_select': [2],
'classifier__C': [0.1, 1.0, 10.0], # Regularization strength
}
# Create and run the grid search
grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X, y)
# Output the best parameters and score
print("Best parameters found:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)
Code: Select all
X
Code: Select all
feature_selection__n_features_to_select: [2,3]
Das Hindernis hier ist, dass SequentialFeatureSelector dies nicht tut Betrachten Sie die Auswahl aller Features (auch Passthrough-Selektor genannt) als gültige Feature-Auswahl.
Mit anderen Worten, ich möchte eine Rastersuche ausführen, die auch die Einstellung von
Code: Select all
('feature_selection', 'passthrough')