from sklearn import datasets
from sklearn.model_selection import train_test_split
= datasets.load_iris()
iris = iris.data
attributes = iris.target
label
= train_test_split(
X_train, X_test, y_train, y_test =0.2, random_state=1) attributes, label, test_size
Dimensionality Reduction
Dimensionality Reduction
PCA
without PCA
from sklearn import tree
= tree.DecisionTreeClassifier()
dt = dt.fit(X_train, y_train)
first first.score(X_test, y_test)
0.9666666666666667
with PCA
from sklearn.decomposition import PCA
= PCA(n_components=4)
pca
= pca.fit_transform(X_train)
pca_attributes
pca.explained_variance_ratio_
array([0.92848323, 0.04764372, 0.01931005, 0.004563 ])
= PCA(n_components = 2)
pca
= pca.fit_transform(X_train)
X_train_pca = pca.fit_transform(X_test) X_test_pca
= dt.fit(X_train_pca, y_train)
second
second.score(X_test_pca, y_test)
0.9333333333333333