# Generate 100 samples of 2 featured dataX,y=make_blobs(n_samples=100,centers=2,n_features=2,random_state=2)print(f"shape of X: {X.shape}")print(f"shape of y: {y.shape}")label_to_class={0:"cat",1:"dog"}
x_axis=X[:,0]# Feature 1y_axis=X[:,1]# Feature 2# Scatter plot with class coloringplt.scatter(x_axis,y_axis,c=y,cmap="coolwarm",alpha=0.7,edgecolor="k")# Plot the query pointplt.scatter(x=-0.3,y=-5,c="black",marker="x",s=100,label="Query Point")# Add labels and legendplt.xlabel("Feature 1")plt.ylabel("Feature 2")plt.title("KNN Data Distribution")plt.legend()plt.show()
defeuclidean_distance(point_a:np.ndarray,point_b:np.ndarray):"""Compute Euclidean distance between two points."""returnnp.sqrt(np.sum((point_a-point_b)**2))defk_nearest_neighbors(train_features:np.ndarray,train_labels:np.ndarray,query_point:np.ndarray,label_to_class:Optional[Dict[int,str]]=None,k:int=5,)->str|int:""" train_features -> (n_samples, n_features) train_labels -> (n_samples,) query_point -> shape (n_features,) k -> number of neighbors """num_samples=train_features.shape[0]distances=[]# Compute distance from query point to each training sampleforiinrange(num_samples):dist=euclidean_distance(query_point,train_features[i])distances.append((dist,train_labels[i]))# Sort by distance and take top-kdistances=sorted(distances,key=lambdax:x[0])[:k]# Extract labels of the k nearest neighborsneighbor_labels=np.array([labelfor_,labelindistances])# Count majority voteunique_labels,counts=np.unique(neighbor_labels,return_counts=True)predicted_label=unique_labels[counts.argmax()]iflabel_to_class:returnlabel_to_class[int(predicted_label)]returnint(predicted_label)