Visual Servoing Platform  version 3.6.1 under development (2024-10-13)
vpGEMM.h
1 /*
2  * ViSP, open source Visual Servoing Platform software.
3  * Copyright (C) 2005 - 2024 by Inria. All rights reserved.
4  *
5  * This software is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  * See the file LICENSE.txt at the root directory of this source
10  * distribution for additional information about the GNU GPL.
11  *
12  * For using ViSP with software that can not be combined with the GNU
13  * GPL, please contact Inria about acquiring a ViSP Professional
14  * Edition License.
15  *
16  * See https://visp.inria.fr for more information.
17  *
18  * This software was developed at:
19  * Inria Rennes - Bretagne Atlantique
20  * Campus Universitaire de Beaulieu
21  * 35042 Rennes Cedex
22  * France
23  *
24  * If you have questions regarding the use of this file, please contact
25  * Inria at visp@inria.fr
26  *
27  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
28  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
29  *
30  * Description:
31  * Matrix generalized multiplication.
32  */
33 
34 #ifndef VP_GEMM_H
35 #define VP_GEMM_H
36 
37 #include <visp3/core/vpArray2D.h>
38 #include <visp3/core/vpException.h>
39 
40 BEGIN_VISP_NAMESPACE
41 const vpArray2D<double> null(0, 0);
42 
53 typedef enum
54 {
55  VP_GEMM_A_T = 1,
56  VP_GEMM_B_T = 2,
57  VP_GEMM_C_T = 4,
58 } vpGEMMmethod;
59 
60 template <unsigned int>
61 inline void GEMMsize(const vpArray2D<double> & /*A*/, const vpArray2D<double> & /*B*/, unsigned int & /*Arows*/,
62  unsigned int & /*Acols*/, unsigned int & /*Brows*/, unsigned int & /*Bcols*/)
63 { }
64 
65 template <>
66 void inline GEMMsize<0>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
67  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
68 {
69  Arows = A.getRows();
70  Acols = A.getCols();
71  Brows = B.getRows();
72  Bcols = B.getCols();
73 }
74 
75 template <>
76 inline void GEMMsize<1>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
77  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
78 {
79  Arows = A.getCols();
80  Acols = A.getRows();
81  Brows = B.getRows();
82  Bcols = B.getCols();
83 }
84 template <>
85 inline void GEMMsize<2>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
86  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
87 {
88  Arows = A.getRows();
89  Acols = A.getCols();
90  Brows = B.getCols();
91  Bcols = B.getRows();
92 }
93 template <>
94 inline void GEMMsize<3>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
95  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
96 {
97  Arows = A.getCols();
98  Acols = A.getRows();
99  Brows = B.getCols();
100  Bcols = B.getRows();
101 }
102 
103 template <>
104 inline void GEMMsize<4>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
105  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
106 {
107  Arows = A.getRows();
108  Acols = A.getCols();
109  Brows = B.getRows();
110  Bcols = B.getCols();
111 }
112 
113 template <>
114 inline void GEMMsize<5>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
115  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
116 {
117  Arows = A.getCols();
118  Acols = A.getRows();
119  Brows = B.getRows();
120  Bcols = B.getCols();
121 }
122 
123 template <>
124 inline void GEMMsize<6>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
125  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
126 {
127  Arows = A.getRows();
128  Acols = A.getCols();
129  Brows = B.getCols();
130  Bcols = B.getRows();
131 }
132 
133 template <>
134 inline void GEMMsize<7>(const vpArray2D<double> &A, const vpArray2D<double> &B, unsigned int &Arows,
135  unsigned int &Acols, unsigned int &Brows, unsigned int &Bcols)
136 {
137  Arows = A.getCols();
138  Acols = A.getRows();
139  Brows = B.getCols();
140  Bcols = B.getRows();
141 }
142 
143 template <unsigned int>
144 inline void GEMM1(const unsigned int & /*Arows*/, const unsigned int & /*Brows*/, const unsigned int & /*Bcols*/,
145  const vpArray2D<double> & /*A*/, const vpArray2D<double> & /*B*/, const double & /*alpha*/,
146  vpArray2D<double> & /*D*/)
147 { }
148 
149 template <>
150 inline void GEMM1<0>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
151  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha, vpArray2D<double> &D)
152 {
153  for (unsigned int r = 0; r < Arows; ++r) {
154  for (unsigned int c = 0; c < Bcols; ++c) {
155  double sum = 0;
156  for (unsigned int n = 0; n < Brows; ++n) {
157  sum += A[r][n] * B[n][c] * alpha;
158  }
159  D[r][c] = sum;
160  }
161  }
162 }
163 
164 template <>
165 inline void GEMM1<1>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
166  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha, vpArray2D<double> &D)
167 {
168  for (unsigned int r = 0; r < Arows; ++r) {
169  for (unsigned int c = 0; c < Bcols; ++c) {
170  double sum = 0;
171  for (unsigned int n = 0; n < Brows; ++n) {
172  sum += A[n][r] * B[n][c] * alpha;
173  }
174  D[r][c] = sum;
175  }
176  }
177 }
178 
179 template <>
180 inline void GEMM1<2>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
181  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha, vpArray2D<double> &D)
182 {
183  for (unsigned int r = 0; r < Arows; ++r) {
184  for (unsigned int c = 0; c < Bcols; ++c) {
185  double sum = 0;
186  for (unsigned int n = 0; n < Brows; ++n) {
187  sum += A[r][n] * B[c][n] * alpha;
188  }
189  D[r][c] = sum;
190  }
191  }
192 }
193 
194 template <>
195 inline void GEMM1<3>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
196  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha, vpArray2D<double> &D)
197 {
198  for (unsigned int r = 0; r < Arows; ++r) {
199  for (unsigned int c = 0; c < Bcols; ++c) {
200  double sum = 0;
201  for (unsigned int n = 0; n < Brows; ++n) {
202  sum += A[n][r] * B[c][n] * alpha;
203  }
204  D[r][c] = sum;
205  }
206  }
207 }
208 
209 template <unsigned int>
210 inline void GEMM2(const unsigned int & /*Arows*/, const unsigned int & /*Brows*/, const unsigned int & /*Bcols*/,
211  const vpArray2D<double> & /*A*/, const vpArray2D<double> & /*B*/, const double & /*alpha*/,
212  const vpArray2D<double> & /*C*/, const double & /*beta*/, vpArray2D<double> & /*D*/)
213 { }
214 
215 template <>
216 inline void GEMM2<0>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
217  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
218  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
219 {
220  for (unsigned int r = 0; r < Arows; ++r) {
221  for (unsigned int c = 0; c < Bcols; ++c) {
222  double sum = 0;
223  for (unsigned int n = 0; n < Brows; ++n) {
224  sum += A[r][n] * B[n][c] * alpha;
225  }
226  D[r][c] = sum + (C[r][c] * beta);
227  }
228  }
229 }
230 
231 template <>
232 inline void GEMM2<1>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
233  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
234  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
235 {
236  for (unsigned int r = 0; r < Arows; ++r) {
237  for (unsigned int c = 0; c < Bcols; ++c) {
238  double sum = 0;
239  for (unsigned int n = 0; n < Brows; ++n) {
240  sum += A[n][r] * B[n][c] * alpha;
241  }
242  D[r][c] = sum + (C[r][c] * beta);
243  }
244  }
245 }
246 
247 template <>
248 inline void GEMM2<2>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
249  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
250  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
251 {
252  for (unsigned int r = 0; r < Arows; ++r) {
253  for (unsigned int c = 0; c < Bcols; ++c) {
254  double sum = 0;
255  for (unsigned int n = 0; n < Brows; ++n) {
256  sum += A[r][n] * B[c][n] * alpha;
257  }
258  D[r][c] = sum + (C[r][c] * beta);
259  }
260  }
261 }
262 
263 template <>
264 inline void GEMM2<3>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
265  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
266  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
267 {
268  for (unsigned int r = 0; r < Arows; ++r) {
269  for (unsigned int c = 0; c < Bcols; ++c) {
270  double sum = 0;
271  for (unsigned int n = 0; n < Brows; ++n) {
272  sum += A[n][r] * B[c][n] * alpha;
273  }
274  D[r][c] = sum + (C[r][c] * beta);
275  }
276  }
277 }
278 
279 template <>
280 inline void GEMM2<4>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
281  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
282  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
283 {
284  for (unsigned int r = 0; r < Arows; ++r) {
285  for (unsigned int c = 0; c < Bcols; ++c) {
286  double sum = 0;
287  for (unsigned int n = 0; n < Brows; ++n) {
288  sum += A[r][n] * B[n][c] * alpha;
289  }
290  D[r][c] = sum + (C[c][r] * beta);
291  }
292  }
293 }
294 
295 template <>
296 inline void GEMM2<5>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
297  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
298  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
299 {
300  for (unsigned int r = 0; r < Arows; ++r) {
301  for (unsigned int c = 0; c < Bcols; ++c) {
302  double sum = 0;
303  for (unsigned int n = 0; n < Brows; ++n) {
304  sum += A[n][r] * B[n][c] * alpha;
305  }
306  D[r][c] = sum + (C[c][r] * beta);
307  }
308  }
309 }
310 
311 template <>
312 inline void GEMM2<6>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
313  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
314  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
315 {
316  for (unsigned int r = 0; r < Arows; ++r) {
317  for (unsigned int c = 0; c < Bcols; ++c) {
318  double sum = 0;
319  for (unsigned int n = 0; n < Brows; ++n) {
320  sum += A[r][n] * B[c][n] * alpha;
321  }
322  D[r][c] = sum + (C[c][r] * beta);
323  }
324  }
325 }
326 
327 template <>
328 inline void GEMM2<7>(const unsigned int &Arows, const unsigned int &Brows, const unsigned int &Bcols,
329  const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
330  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
331 {
332  for (unsigned int r = 0; r < Arows; ++r) {
333  for (unsigned int c = 0; c < Bcols; ++c) {
334  double sum = 0;
335  for (unsigned int n = 0; n < Brows; ++n) {
336  sum += A[n][r] * B[c][n] * alpha;
337  }
338  D[r][c] = sum + (C[c][r] * beta);
339  }
340  }
341 }
342 
343 template <unsigned int T>
344 inline void vpTGEMM(const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
345  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D)
346 {
347  unsigned int Arows;
348  unsigned int Acols;
349  unsigned int Brows;
350  unsigned int Bcols;
351 
352  GEMMsize<T>(A, B, Arows, Acols, Brows, Bcols);
353 
354  try {
355  if ((Arows != D.getRows()) || (Bcols != D.getCols())) {
356  D.resize(Arows, Bcols);
357  }
358  }
359  catch (...) {
360  throw;
361  }
362 
363  if (Acols != Brows) {
364  throw(vpException(vpException::dimensionError, "In vpGEMM, cannot multiply (%dx%d) matrix by (%dx%d) matrix", Arows,
365  Acols, Brows, Bcols));
366  }
367 
368  if (C.getRows() != 0 && C.getCols() != 0) {
369  if ((Arows != C.getRows()) || (Bcols != C.getCols())) {
370  throw(vpException(vpException::dimensionError, "In vpGEMM, cannot add resulting (%dx%d) matrix to (%dx%d) matrix",
371  Arows, Bcols, C.getRows(), C.getCols()));
372  }
373 
374  GEMM2<T>(Arows, Brows, Bcols, A, B, alpha, C, beta, D);
375  }
376  else {
377  GEMM1<T>(Arows, Brows, Bcols, A, B, alpha, D);
378  }
379 }
380 
414 inline void vpGEMM(const vpArray2D<double> &A, const vpArray2D<double> &B, const double &alpha,
415  const vpArray2D<double> &C, const double &beta, vpArray2D<double> &D, const unsigned int &ops = 0)
416 {
417  switch (ops) {
418  case 0:
419  vpTGEMM<0>(A, B, alpha, C, beta, D);
420  break;
421  case 1:
422  vpTGEMM<1>(A, B, alpha, C, beta, D);
423  break;
424  case 2:
425  vpTGEMM<2>(A, B, alpha, C, beta, D);
426  break;
427  case 3:
428  vpTGEMM<3>(A, B, alpha, C, beta, D);
429  break;
430  case 4:
431  vpTGEMM<4>(A, B, alpha, C, beta, D);
432  break;
433  case 5:
434  vpTGEMM<5>(A, B, alpha, C, beta, D);
435  break;
436  case 6:
437  vpTGEMM<6>(A, B, alpha, C, beta, D);
438  break;
439  case 7:
440  vpTGEMM<7>(A, B, alpha, C, beta, D);
441  break;
442  default:
443  throw(vpException(vpException::functionNotImplementedError, "Operation on vpGEMM not implemented"));
444  break;
445  }
446 }
447 END_VISP_NAMESPACE
448 #endif
unsigned int getCols() const
Definition: vpArray2D.h:337
void vpGEMM(const vpArray2D< double > &A, const vpArray2D< double > &B, const double &alpha, const vpArray2D< double > &C, const double &beta, vpArray2D< double > &D, const unsigned int &ops=0)
Definition: vpGEMM.h:414
vpGEMMmethod
Definition: vpGEMM.h:54
unsigned int getRows() const
Definition: vpArray2D.h:347
error that can be emitted by ViSP classes.
Definition: vpException.h:60
@ functionNotImplementedError
Function not implemented.
Definition: vpException.h:66
@ dimensionError
Bad dimension.
Definition: vpException.h:71