k_means.cpp
1 // OpenNN: Open Neural Networks Library
2 // www.opennn.net
3 //
4 // K - M E A N S C L A S S
5 //
6 // Artificial Intelligence Techniques SL
7 // artelnics@artelnics.com
8 
9 #include "k_means.h"
10 
11 namespace OpenNN
12 {
13 
15 
16 KMeans::Results KMeans::calculate_k_means(const Matrix<double>& matrix, const size_t& k) const
17 {
18  Results k_means_results;
19 
20  const size_t rows_number = matrix.get_rows_number();
21  const size_t columns_number = matrix.get_columns_number();
22 
23  Vector<Vector<size_t>> clusters(k);
24 
25  Matrix<double> previous_means(k, columns_number);
26  Matrix<double> means(k, columns_number);
27 
28  const Vector<double> minimums = OpenNN::columns_minimums(matrix);
29  const Vector<double> maximums = OpenNN::columns_maximums(matrix);
30 
31  size_t iterations = 0;
32 
33  bool end = false;
34 
35  // Calculate initial means
36 
37  Vector<size_t> selected_rows(k);
38 
39  const size_t initial_center = calculate_random_uniform<size_t>(0, rows_number);
40 
41  previous_means.set_row(0, matrix.get_row(initial_center));
42  selected_rows[0] = initial_center;
43 
44  for(size_t i = 1; i < k; i++)
45  {
46  Vector<double> minimum_distances(rows_number, 0.0);
47 
48  for(size_t j = 0; j < rows_number; j++)
49  {
50  Vector<double> distances(i, 0.0);
51 
52  const Vector<double> row_data = matrix.get_row(j);
53 
54  for(size_t l = 0; l < i; l++)
55  {
56  distances[l] = euclidean_distance(row_data, previous_means.get_row(l));
57  }
58 
59  const double minimum_distance = minimum(distances);
60 
61  minimum_distances[static_cast<size_t>(j)] = minimum_distance;
62  }
63 
64  size_t sample_index = calculate_sample_index_proportional_probability(minimum_distances);
65 
66  int random_failures = 0;
67 
68  while(selected_rows.contains(sample_index))
69  {
70  sample_index = calculate_sample_index_proportional_probability(minimum_distances);
71 
72  random_failures++;
73 
74  if(random_failures > 5)
75  {
76  Vector<double> new_row(columns_number);
77 
78  new_row.randomize_uniform(minimums, maximums);
79 
80  previous_means.set_row(i, new_row);
81 
82  break;
83  }
84  }
85 
86  if(random_failures <= 5)
87  {
88  previous_means.set_row(i, matrix.get_row(sample_index));
89  }
90  }
91 
92  // Main loop
93 
94  while(!end)
95  {
96  clusters.clear();
97  clusters.set(k);
98 
99  #pragma omp parallel for
100 
101  for(size_t i = 0; i < rows_number; i++)
102  {
103  Vector<double> distances(k, 0.0);
104 
105  const Vector<double> current_row = matrix.get_row(i);
106 
107  for(size_t j = 0; j < k; j++)
108  {
109  distances[j] = euclidean_distance(current_row, previous_means.get_row(j));
110  }
111 
112  const size_t minimum_distance_index = minimal_index(distances);
113 
114  #pragma omp critical
115  clusters[minimum_distance_index].push_back(static_cast<size_t>(i));
116  }
117 
118  for(size_t i = 0; i < k; i++)
119  {
120  means.set_row(i, rows_means(matrix, clusters[i]));
121  }
122 
123  if(previous_means == means)
124  {
125  end = true;
126  }
127  else if(iterations > 100)
128  {
129  end = true;
130  }
131 
132  previous_means = means;
133  iterations++;
134  }
135 
136 // k_means_results.means = means;
137  k_means_results.clusters = clusters;
138 
139  return k_means_results;
140 }
141 
142 
143 size_t KMeans::calculate_sample_index_proportional_probability(const Vector<double>& vector) const
144 {
145  const size_t this_size = vector.size();
146 
147  Vector<double> cumulative = OpenNN::cumulative(vector);
148 
149  const double sum = vector.calculate_sum();
150 
151  const double random = calculate_random_uniform(0.,sum);
152 
153  size_t selected_index = 0;
154 
155  for(size_t i = 0; i < this_size; i++)
156  {
157  if(i == 0 && random < cumulative[0])
158  {
159  selected_index = i;
160  break;
161  }
162  else if(random < cumulative[i] && random >= cumulative[i-1])
163  {
164  selected_index = i;
165  break;
166  }
167  }
168 
169  return selected_index;
170 }
171 
172 }
173 
174 // OpenNN: Open Neural Networks Library.
175 // Copyright(C) 2005-2019 Artificial Intelligence Techniques, SL.
176 //
177 // This library is free software; you can redistribute it and/or
178 // modify it under the terms of the GNU Lesser General Public
179 // License as published by the Free Software Foundation; either
180 // version 2.1 of the License, or any later version.
181 //
182 // This library is distributed in the hope that it will be useful,
183 // but WITHOUT ANY WARRANTY; without even the implied warranty of
184 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
185 // Lesser General Public License for more details.
186 
187 // You should have received a copy of the GNU Lesser General Public
188 // License along with this library; if not, write to the Free Software
189 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
OpenNN::Matrix::get_rows_number
const size_t & get_rows_number() const
Returns the number of rows in the matrix.
Definition: matrix.h:1287
OpenNN::Matrix< double >
OpenNN::Matrix::get_row
Vector< T > get_row(const size_t &) const
Definition: matrix.h:2563
OpenNN::Vector::set
void set()
Sets the size of this vector to zero.
Definition: vector.h:682
OpenNN::Vector::contains
bool contains(const T &) const
Definition: vector.h:1123
OpenNN::Matrix::get_columns_number
const size_t & get_columns_number() const
Returns the number of columns in the matrix.
Definition: matrix.h:1296
OpenNN::KMeans::Results
Definition: k_means.h:46
OpenNN::Matrix::set_row
void set_row(const size_t &, const Vector< T > &)
Definition: matrix.h:3011
OpenNN::Vector
This template represents an array of any kind of numbers or objects.
Definition: vector.h:54
OpenNN::Vector::randomize_uniform
void randomize_uniform(const T &, const T &)
Definition: vector.h:847
OpenNN::Vector::calculate_sum
T calculate_sum() const
Returns the sum of the elements of this vector.
Definition: vector.h:2458
OpenNN::KMeans::calculate_k_means
Results calculate_k_means(const Matrix< double > &, const size_t &) const
Definition: k_means.cpp:16