from sklearn import datasets
from sklearn.model_selection import train_test_split
iris = datasets.load_iris()
attributes = iris.data
label = iris.target
X_train, X_test, y_train, y_test = train_test_split(
attributes, label, test_size=0.2, random_state=1)Dimensionality Reduction
Dimensionality Reduction
PCA
without PCA
from sklearn import tree
dt = tree.DecisionTreeClassifier()
first = dt.fit(X_train, y_train)
first.score(X_test, y_test)0.9666666666666667
with PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=4)
pca_attributes = pca.fit_transform(X_train)
pca.explained_variance_ratio_array([0.92848323, 0.04764372, 0.01931005, 0.004563 ])
pca = PCA(n_components = 2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.fit_transform(X_test)second = dt.fit(X_train_pca, y_train)
second.score(X_test_pca, y_test)0.9333333333333333