Pergunta

Could you please let me know how to set class-weight for imbalanced classes in KerasClassifier while it is used inside the GridSearchCV?

# Use scikit-learn to grid search the batch size and epochs
from collections import Counter
from sklearn.model_selection import train_test_split,StratifiedKFold,learning_curve,validation_curve,GridSearchCV
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import classification_report
import pandas as pd
from sklearn.pipeline import Pipeline

# Function to create model, required for KerasClassifier
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=20, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# load dataset
X, y = make_classification(n_classes=2, class_sep=2,weights=[0.95, 0.05], n_informative=3, n_redundant=2, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
print('Original dataset shape {}'.format(Counter(y)))

ln = X.shape

X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=0)
st=StandardScaler()

# create model
model = KerasClassifier(build_fn=create_model, verbose=0)
pipeline = Pipeline(steps=[('scaler', st),
                               ('clf', model )])
# define the grid search parameters
batch_size = [20, 40, 60, 80, 100]
epochs = [ 50, 100]
param_grid = dict(clf__batch_size=batch_size, clf__epochs=epochs)
cv = StratifiedKFold(n_splits=5, random_state=42)

grid = GridSearchCV(estimator=pipeline, param_grid=param_grid,cv=cv,scoring="f1")
grid_result = grid.fit(X_train, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
# Predictions
ypred = grid_result.predict(X_train)
print(classification_report(y_train, ypred))
print('######################')
ypred2 = grid_result.predict(X_test)
print(classification_report(y_test, ypred2))

Thanks in advance.

Nenhuma solução correta

Licenciado em: CC-BY-SA com atribuição
scroll top