31 size_t iterations = 0;
39 const size_t initial_center = calculate_random_uniform<size_t>(0, rows_number);
42 selected_rows[0] = initial_center;
44 for(
size_t i = 1; i < k; i++)
48 for(
size_t j = 0; j < rows_number; j++)
54 for(
size_t l = 0; l < i; l++)
56 distances[l] = euclidean_distance(row_data, previous_means.
get_row(l));
59 const double minimum_distance = minimum(distances);
61 minimum_distances[static_cast<size_t>(j)] = minimum_distance;
64 size_t sample_index = calculate_sample_index_proportional_probability(minimum_distances);
66 int random_failures = 0;
68 while(selected_rows.
contains(sample_index))
70 sample_index = calculate_sample_index_proportional_probability(minimum_distances);
74 if(random_failures > 5)
80 previous_means.
set_row(i, new_row);
86 if(random_failures <= 5)
99 #pragma omp parallel for
101 for(
size_t i = 0; i < rows_number; i++)
107 for(
size_t j = 0; j < k; j++)
109 distances[j] = euclidean_distance(current_row, previous_means.
get_row(j));
112 const size_t minimum_distance_index = minimal_index(distances);
115 clusters[minimum_distance_index].push_back(static_cast<size_t>(i));
118 for(
size_t i = 0; i < k; i++)
120 means.
set_row(i, rows_means(matrix, clusters[i]));
123 if(previous_means == means)
127 else if(iterations > 100)
132 previous_means = means;
137 k_means_results.clusters = clusters;
139 return k_means_results;
143 size_t KMeans::calculate_sample_index_proportional_probability(
const Vector<double>& vector)
const
145 const size_t this_size = vector.size();
151 const double random = calculate_random_uniform(0.,sum);
153 size_t selected_index = 0;
155 for(
size_t i = 0; i < this_size; i++)
157 if(i == 0 && random < cumulative[0])
162 else if(random < cumulative[i] && random >= cumulative[i-1])
169 return selected_index;