I am trying to find the KNN for the dataset provided but i keep getting the error "ValueError: Input contains NaN, infinity or a value too large for dtype('float64')." and i dont know why. Please help. 4 Female2 Femaie2 FemaleO FemaleABDEFGHKLMP1 CustomerlD Zip CodeSpending Scc AppleUser39AnnualIncon SpouseChildrenGenderMiles from W Has Winter AgePACKAGENUMBER_VI: ALL-INCLUSIVE501384 MaleAMale2115119393939350138151194350138154 Male1195450138154 Male11965.412701513 Male218116412701513 Male121811841270153 Male2.12181116108412701513 Male2.21819194921612 Female12061111019492162 Female12061121119492162 Female120616777777771312194921612 Female20O FemaleO FemaleO FemaleO Female141345449164231514454491642316154544916423171645449164231817373121714 Female43140121918373121714 Female43140122019373121714 Female43140122120373121714 Female43140121 Female7676767662221556411712222322556411711 Female2222423556411711 Female2222524556411711 Female2222625818341814 Female113522726818341814 Female11356222827818341814 Female1135622928818341814 Female11356222 Female2 Female2 Female3029210681821239431943021068181223-3231210681812123942943!333221068182 Female2123341 Male1 Male1 Male337193919264255535347193919264312.363571939191264312.3736719391911 Male264312.572727272143837122301912 Female430121393812230192 Female4301214039122301912 Female43024140122301912 Female4301213 Male3 Male3 Male4241825291934167117141414414499434282529194167170232544438252919416717454482529193 Male41671756844O FemaleO FemaleO Female46455684419313525684499999915!474656844193352148475684456844193352O FemaleOPemale2 Female494856844193135121505149494392045854943915!50202 Female45815TS525515157777775149439202 Female4581535249439202 Female4585453246892012 Female4242555424689202 Female1242565524689202 Female412425756246892012 Female4124772585730489202 Male337131595830489202 Male31371312.60616263645930489202 Male337131137979796030489202 Male31376175251202 Male322262752512012 Male32226375251202 Male3222 # -*- coding: utf-8 -*-Created on Sat Apr 24 14:58:26 2021@author: Davidfrom sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.neighbors import KNeighborsClassifierimport numpy as npimport matplotlib.pyplot as pltfrom matplotlib.colors import ListedColormapimport pandas as pddef plot_decision_regions (X, y, classifier, test_idx=None, resolution=0.02):# setup marker generator and color mapmarkers = ('s', 'x', 'o', '^', 'v')colors = ('gray', 'indigo', 'purple','yellow' , 'gray')cmap = ListedColormap(colors [:len(np.unique(y))])# plot the decision surfacex1_min, x1_max = X[:, 0].min() - .25, X[:, 0].max() + .25x2_min, x2 max = X[:, 1].min() - .25, X[:, 1].max() + .25xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution))z = classifier.predict(np.array( (xx1. ravel(), xx2. ravel()1).T)Z = Z. reshape(xx1. shape)plt.contourf(xx1, xx2, z, alpha=0.4, cmap=cmap)plt.xlim(xx1.min(), xx1.max())plt.ylim(xx2.min(), xx2.max(0)# plot all samplesX_test, y_test = X[test_idx, :), yltest_idx]for idx, cl in enumerate(np.unique(y)):plt.scatter(x=X[y == cl, 0], y=xly == cl, 1),alpha-0.8, с-стар (idx),marker-markers (idx], label=cl)# highlight test samplesif test_idx:X_test, y_test = X[test_idx, :), yltest_idx]pīt.scatter(X_test[:, 0], x test[:, 1], c=",alpha=1.0, linewidth=1, marker='o',s=55, label='test set')# Importing the datasetdataset = pd. read_csv(r'/Users/jaylenmealing/Downloads/VisitJamaica_today.csv', sep="a")X = dataset.iloc[:, [4, 121].valuesy = dataset.iloc[:, 6].valuesX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)sc = StandardScaler()X_train_std = sc.fit_transform(X_train)X_test_std = sc.fit_transform(X_test)X_combined_std = np.vstack( (X_train_std, X_test_std))y_combined = np.hstack((y_train, y_test))knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')knn. fit(X_train_std, y_train)plot_decision_regions (X_combined_std, y_combined,classifier=knn, test_idx=range(600,725))plt.title('K-NN (Training set)')plt.xlabel('').plt.ylabel('')plt.show()# Training the K-NN model on the Training setfrom sklearn.neighbors import KNeighborsClassifierclassifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)classifier.fit(X_train, y_train)# Predicting the Test set resultsy_pred = classifier.predict(X_test)print(np.concatenate( (y_pred. reshape(len(y_pred),1), y_test.reshape(len(y_test), 1)),1))# Making the Confusion Matrixfrom sklearn.metrics import confusion_matrix, accuracy_scorecm = confusion_matrix(y_test, y_pred)print(cm)accuracy = accuracy_score(y_test, y_pred)

Question

I am trying to find the KNN for the dataset provided but i keep getting the error &#34;ValueError: Input contains NaN, infinity or a value too large for dtype('float64').&#34; and i dont know why.  Please help. 4 Female2 Femaie2 FemaleO FemaleABDEFGHKLMP1 CustomerlD Zip CodeSpending Scc AppleUser39AnnualIncon SpouseChildrenGenderMiles from W Has Winter AgePACKAGENUMBER_VI: ALL-INCLUSIVE501384 MaleAMale2115119393939350138151194350138154 Male1195450138154 Male11965.412701513 Male218116412701513 Male121811841270153 Male2.12181116108412701513 Male2.21819194921612 Female12061111019492162 Female12061121119492162 Female120616777777771312194921612 Female20O FemaleO FemaleO FemaleO Female141345449164231514454491642316154544916423171645449164231817373121714 Female43140121918373121714 Female43140122019373121714 Female43140122120373121714 Female43140121 Female7676767662221556411712222322556411711 Female2222423556411711 Female2222524556411711 Female2222625818341814 Female113522726818341814 Female11356222827818341814 Female1135622928818341814 Female11356222 Female2 Female2 Female3029210681821239431943021068181223-3231210681812123942943!333221068182 Female2123341 Male1 Male1 Male337193919264255535347193919264312.363571939191264312.3736719391911 Male264312.572727272143837122301912 Female430121393812230192 Female4301214039122301912 Female43024140122301912 Female4301213 Male3 Male3 Male4241825291934167117141414414499434282529194167170232544438252919416717454482529193 Male41671756844O FemaleO FemaleO Female46455684419313525684499999915!474656844193352148475684456844193352O FemaleOPemale2 Female494856844193135121505149494392045854943915!50202 Female45815TS525515157777775149439202 Female4581535249439202 Female4585453246892012 Female4242555424689202 Female1242565524689202 Female412425756246892012 Female4124772585730489202 Male337131595830489202 Male31371312.60616263645930489202 Male337131137979796030489202 Male31376175251202 Male322262752512012 Male32226375251202 Male3222 # -*- coding: utf-8 -*-Created on Sat Apr 24 14:58:26 2021@author: Davidfrom sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.neighbors import KNeighborsClassifierimport numpy as npimport matplotlib.pyplot as pltfrom matplotlib.colors import ListedColormapimport pandas as pddef plot_decision_regions (X, y, classifier, test_idx=None, resolution=0.02):# setup marker generator and color mapmarkers = ('s', 'x', 'o', '^', 'v')colors = ('gray', 'indigo', 'purple','yellow' , 'gray')cmap = ListedColormap(colors [:len(np.unique(y))])# plot the decision surfacex1_min, x1_max = X[:, 0].min() - .25, X[:, 0].max() + .25x2_min, x2 max = X[:, 1].min() - .25, X[:, 1].max() + .25xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution))z = classifier.predict(np.array( (xx1. ravel(), xx2. ravel()1).T)Z = Z. reshape(xx1. shape)plt.contourf(xx1, xx2, z, alpha=0.4, cmap=cmap)plt.xlim(xx1.min(), xx1.max())plt.ylim(xx2.min(), xx2.max(0)# plot all samplesX_test, y_test = X[test_idx, :), yltest_idx]for idx, cl in enumerate(np.unique(y)):plt.scatter(x=X[y == cl, 0], y=xly == cl, 1),alpha-0.8, с-стар (idx),marker-markers (idx], label=cl)# highlight test samplesif test_idx:X_test, y_test = X[test_idx, :), yltest_idx]pīt.scatter(X_test[:, 0], x test[:, 1], c=&#34;,alpha=1.0, linewidth=1, marker='o',s=55, label='test set')# Importing the datasetdataset = pd. read_csv(r'/Users/jaylenmealing/Downloads/VisitJamaica_today.csv', sep=&#34;a&#34;)X = dataset.iloc[:, [4, 121].valuesy = dataset.iloc[:, 6].valuesX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)sc = StandardScaler()X_train_std = sc.fit_transform(X_train)X_test_std = sc.fit_transform(X_test)X_combined_std = np.vstack( (X_train_std, X_test_std))y_combined = np.hstack((y_train, y_test))knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')knn. fit(X_train_std, y_train)plot_decision_regions (X_combined_std, y_combined,classifier=knn, test_idx=range(600,725))plt.title('K-NN (Training set)')plt.xlabel('').plt.ylabel('')plt.show()# Training the K-NN model on the Training setfrom sklearn.neighbors import KNeighborsClassifierclassifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)classifier.fit(X_train, y_train)# Predicting the Test set resultsy_pred = classifier.predict(X_test)print(np.concatenate( (y_pred. reshape(len(y_pred),1), y_test.reshape(len(y_test), 1)),1))# Making the Confusion Matrixfrom sklearn.metrics import confusion_matrix, accuracy_scorecm = confusion_matrix(y_test, y_pred)print(cm)accuracy = accuracy_score(y_test, y_pred)

Accepted Answer

Find the error (ValueError: Input contains NaN, infinity or a value too large for dtype('float64'))…