def euclidean_distance(point_a: np.ndarray, point_b: np.ndarray):
"""Compute Euclidean distance between two points."""
return np.sqrt(np.sum((point_a - point_b) ** 2))
def k_nearest_neighbors(
train_features: np.ndarray,
train_labels: np.ndarray,
query_point: np.ndarray,
label_to_class: Optional[Dict[int, str]] = None,
k: int = 5,
) -> str | int:
"""
train_features -> (n_samples, n_features)
train_labels -> (n_samples,)
query_point -> shape (n_features,)
k -> number of neighbors
"""
num_samples = train_features.shape[0]
distances = []
# Compute distance from query point to each training sample
for i in range(num_samples):
dist = euclidean_distance(query_point, train_features[i])
distances.append((dist, train_labels[i]))
# Sort by distance and take top-k
distances = sorted(distances, key=lambda x: x[0])[:k]
# Extract labels of the k nearest neighbors
neighbor_labels = np.array([label for _, label in distances])
# Count majority vote
unique_labels, counts = np.unique(neighbor_labels, return_counts=True)
predicted_label = unique_labels[counts.argmax()]
if label_to_class:
return label_to_class[int(predicted_label)]
return int(predicted_label)