- "n_samples = len(digits.data)\ndata = digits.data / 16.0\ndata -= data.mean(axis=0)\n\n# We learn the digits on the first half of the digits\ndata_train, targets_train = (data[: n_samples // 2], digits.target[: n_samples // 2])\n\n\n# Now predict the value of the digit on the second half:\ndata_test, targets_test = (data[n_samples // 2 :], digits.target[n_samples // 2 :])\n# data_test = scaler.transform(data_test)\n\n# Create a classifier: a support vector classifier\nkernel_svm = svm.SVC(gamma=0.2)\nlinear_svm = svm.LinearSVC(dual=\"auto\", random_state=42)\n\n# create pipeline from kernel approximation\n# and linear svm\nfeature_map_fourier = RBFSampler(gamma=0.2, random_state=1)\nfeature_map_nystroem = Nystroem(gamma=0.2, random_state=1)\nfourier_approx_svm = pipeline.Pipeline(\n [\n (\"feature_map\", feature_map_fourier),\n (\"svm\", svm.LinearSVC(dual=\"auto\", random_state=42)),\n ]\n)\n\nnystroem_approx_svm = pipeline.Pipeline(\n [\n (\"feature_map\", feature_map_nystroem),\n (\"svm\", svm.LinearSVC(dual=\"auto\", random_state=42)),\n ]\n)\n\n# fit and predict using linear and kernel svm:\n\nkernel_svm_time = time()\nkernel_svm.fit(data_train, targets_train)\nkernel_svm_score = kernel_svm.score(data_test, targets_test)\nkernel_svm_time = time() - kernel_svm_time\n\nlinear_svm_time = time()\nlinear_svm.fit(data_train, targets_train)\nlinear_svm_score = linear_svm.score(data_test, targets_test)\nlinear_svm_time = time() - linear_svm_time\n\nsample_sizes = 30 * np.arange(1, 10)\nfourier_scores = []\nnystroem_scores = []\nfourier_times = []\nnystroem_times = []\n\nfor D in sample_sizes:\n fourier_approx_svm.set_params(feature_map__n_components=D)\n nystroem_approx_svm.set_params(feature_map__n_components=D)\n start = time()\n nystroem_approx_svm.fit(data_train, targets_train)\n nystroem_times.append(time() - start)\n\n start = time()\n fourier_approx_svm.fit(data_train, targets_train)\n fourier_times.append(time() - start)\n\n fourier_score = fourier_approx_svm.score(data_test, targets_test)\n nystroem_score = nystroem_approx_svm.score(data_test, targets_test)\n nystroem_scores.append(nystroem_score)\n fourier_scores.append(fourier_score)\n\n# plot the results:\nplt.figure(figsize=(16, 4))\naccuracy = plt.subplot(121)\n# second y axis for timings\ntimescale = plt.subplot(122)\n\naccuracy.plot(sample_sizes, nystroem_scores, label=\"Nystroem approx. kernel\")\ntimescale.plot(sample_sizes, nystroem_times, \"--\", label=\"Nystroem approx. kernel\")\n\naccuracy.plot(sample_sizes, fourier_scores, label=\"Fourier approx. kernel\")\ntimescale.plot(sample_sizes, fourier_times, \"--\", label=\"Fourier approx. kernel\")\n\n# horizontal lines for exact rbf and linear kernels:\naccuracy.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [linear_svm_score, linear_svm_score],\n label=\"linear svm\",\n)\ntimescale.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [linear_svm_time, linear_svm_time],\n \"--\",\n label=\"linear svm\",\n)\n\naccuracy.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [kernel_svm_score, kernel_svm_score],\n label=\"rbf svm\",\n)\ntimescale.plot(\n [sample_sizes[0], sample_sizes[-1]],\n [kernel_svm_time, kernel_svm_time],\n \"--\",\n label=\"rbf svm\",\n)\n\n# vertical line for dataset dimensionality = 64\naccuracy.plot([64, 64], [0.7, 1], label=\"n_features\")\n\n# legends and labels\naccuracy.set_title(\"Classification accuracy\")\ntimescale.set_title(\"Training times\")\naccuracy.set_xlim(sample_sizes[0], sample_sizes[-1])\naccuracy.set_xticks(())\naccuracy.set_ylim(np.min(fourier_scores), 1)\ntimescale.set_xlabel(\"Sampling steps = transformed feature dimension\")\naccuracy.set_ylabel(\"Classification accuracy\")\ntimescale.set_ylabel(\"Training time in seconds\")\naccuracy.legend(loc=\"best\")\ntimescale.legend(loc=\"best\")\nplt.tight_layout()\nplt.show()"
0 commit comments