diff options
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c')
-rw-r--r-- | Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c | 1570 |
1 files changed, 1570 insertions, 0 deletions
diff --git a/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c b/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c new file mode 100644 index 0000000..b0ef760 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c @@ -0,0 +1,1570 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_inverse_f32.c + * Description: Floating-point matrix inverse + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/matrix_functions.h" + + +/** + @ingroup groupMatrix + */ + +/** + @defgroup MatrixInv Matrix Inverse + + Computes the inverse of a matrix. + + The inverse is defined only if the input matrix is square and non-singular (the determinant is non-zero). + The function checks that the input and output matrices are square and of the same size. + + Matrix inversion is numerically sensitive and the CMSIS DSP library only supports matrix + inversion of floating-point matrices. + + @par Algorithm + The Gauss-Jordan method is used to find the inverse. + The algorithm performs a sequence of elementary row-operations until it + reduces the input matrix to an identity matrix. Applying the same sequence + of elementary row-operations to an identity matrix yields the inverse matrix. + If the input matrix is singular, then the algorithm terminates and returns error status + <code>ARM_MATH_SINGULAR</code>. + \image html MatrixInverse.gif "Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method" + */ + +/** + @addtogroup MatrixInv + @{ + */ + +/** + @brief Floating-point matrix inverse. + @param[in] pSrc points to input matrix structure. The source matrix is modified by the function. + @param[out] pDst points to output matrix structure + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed + - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible) + */ +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +arm_status arm_mat_inverse_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn = pSrc->pData; /* input data matrix pointer */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */ + float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */ + float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */ + + uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */ + uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */ + float32_t *pTmpA, *pTmpB; + + float32_t in = 0.0f; /* Temporary input values */ + uint32_t i, rowCnt, flag = 0U, j, loopCnt, l; /* loop counters */ + arm_status status; /* status of matrix inverse */ + uint32_t blkCnt; + +#ifdef ARM_MATH_MATRIX_CHECK + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) + || (pSrc->numRows != pDst->numRows)) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + { + + /*-------------------------------------------------------------------------------------------------------------- + * Matrix Inverse can be solved using elementary row operations. + * + * Gauss-Jordan Method: + * + * 1. First combine the identity matrix and the input matrix separated by a bar to form an + * augmented matrix as follows: + * _ _ _ _ _ _ _ _ + * | | a11 a12 | | | 1 0 | | | X11 X12 | + * | | | | | | | = | | + * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _| + * + * 2. In our implementation, pDst Matrix is used as identity matrix. + * + * 3. Begin with the first row. Let i = 1. + * + * 4. Check to see if the pivot for row i is zero. + * The pivot is the element of the main diagonal that is on the current row. + * For instance, if working with row i, then the pivot element is aii. + * If the pivot is zero, exchange that row with a row below it that does not + * contain a zero in column i. If this is not possible, then an inverse + * to that matrix does not exist. + * + * 5. Divide every element of row i by the pivot. + * + * 6. For every row below and row i, replace that row with the sum of that row and + * a multiple of row i so that each new element in column i below row i is zero. + * + * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros + * for every element below and above the main diagonal. + * + * 8. Now an identical matrix is formed to the left of the bar(input matrix, src). + * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst). + *----------------------------------------------------------------------------------------------------------------*/ + + /* + * Working pointer for destination matrix + */ + pOutT1 = pOut; + /* + * Loop over the number of rows + */ + rowCnt = numRows; + /* + * Making the destination matrix as identity matrix + */ + while (rowCnt > 0U) + { + /* + * Writing all zeroes in lower triangle of the destination matrix + */ + j = numRows - rowCnt; + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + /* + * Writing all ones in the diagonal of the destination matrix + */ + *pOutT1++ = 1.0f; + /* + * Writing all zeroes in upper triangle of the destination matrix + */ + j = rowCnt - 1U; + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + /* + * Decrement the loop counter + */ + rowCnt--; + } + + /* + * Loop over the number of columns of the input matrix. + * All the elements in each column are processed by the row operations + */ + loopCnt = numCols; + /* + * Index modifier to navigate through the columns + */ + l = 0U; + while (loopCnt > 0U) + { + /* + * Check if the pivot element is zero.. + * If it is zero then interchange the row with non zero row below. + * If there is no non zero element to replace in the rows below, + * then the matrix is Singular. + */ + + /* + * Working pointer for the input matrix that points + * * to the pivot element of the particular row + */ + pInT1 = pIn + (l * numCols); + /* + * Working pointer for the destination matrix that points + * * to the pivot element of the particular row + */ + pOutT1 = pOut + (l * numCols); + /* + * Temporary variable to hold the pivot value + */ + in = *pInT1; + + + /* + * Check if the pivot element is zero + */ + if (*pInT1 == 0.0f) + { + /* + * Loop over the number rows present below + */ + for (i = 1U; i < numRows-l; i++) + { + /* + * Update the input and destination pointers + */ + pInT2 = pInT1 + (numCols * i); + pOutT2 = pOutT1 + (numCols * i); + /* + * Check if there is a non zero pivot element to + * * replace in the rows below + */ + if (*pInT2 != 0.0f) + { + f32x4_t vecA, vecB; + /* + * Loop over number of columns + * * to the right of the pilot element + */ + pTmpA = pInT1; + pTmpB = pInT2; + blkCnt = (numCols - l) >> 2; + while (blkCnt > 0U) + { + + vecA = vldrwq_f32(pTmpA); + vecB = vldrwq_f32(pTmpB); + vstrwq_f32(pTmpB, vecA); + vstrwq_f32(pTmpA, vecB); + + pTmpA += 4; + pTmpB += 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = (numCols - l) & 3; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp32q(blkCnt); + + vecA = vldrwq_f32(pTmpA); + vecB = vldrwq_f32(pTmpB); + vstrwq_p_f32(pTmpB, vecA, p0); + vstrwq_p_f32(pTmpA, vecB, p0); + } + + pInT1 += numCols - l; + pInT2 += numCols - l; + pTmpA = pOutT1; + pTmpB = pOutT2; + blkCnt = numCols >> 2; + while (blkCnt > 0U) + { + + vecA = vldrwq_f32(pTmpA); + vecB = vldrwq_f32(pTmpB); + vstrwq_f32(pTmpB, vecA); + vstrwq_f32(pTmpA, vecB); + pTmpA += 4; + pTmpB += 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + */ + blkCnt = numCols & 3; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp32q(blkCnt); + + vecA = vldrwq_f32(pTmpA); + vecB = vldrwq_f32(pTmpB); + vstrwq_p_f32(pTmpB, vecA, p0); + vstrwq_p_f32(pTmpA, vecB, p0); + } + + pOutT1 += numCols; + pOutT2 += numCols; + /* + * Flag to indicate whether exchange is done or not + */ + flag = 1U; + + /* + * Break after exchange is done + */ + break; + } + + } + } + + /* + * Update the status if the matrix is singular + */ + if ((flag != 1U) && (in == 0.0f)) + { + return ARM_MATH_SINGULAR; + } + + /* + * Points to the pivot row of input and destination matrices + */ + pPivotRowIn = pIn + (l * numCols); + pPivotRowDst = pOut + (l * numCols); + + /* + * Temporary pointers to the pivot row pointers + */ + pInT1 = pPivotRowIn; + pOutT1 = pPivotRowDst; + + /* + * Pivot element of the row + */ + in = *(pIn + (l * numCols)); + + pTmpA = pInT1; + + f32x4_t invIn = vdupq_n_f32(1.0f / in); + + blkCnt = (numCols - l) >> 2; + f32x4_t vecA; + while (blkCnt > 0U) + { + *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA * invIn; + pTmpA += 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + */ + blkCnt = (numCols - l) & 3; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp32q(blkCnt); + + + vecA = vldrwq_f32(pTmpA); + vecA = vecA * invIn; + vstrwq_p_f32(pTmpA, vecA, p0); + } + + pInT1 += numCols - l; + /* + * Loop over number of columns + * * to the right of the pilot element + */ + + pTmpA = pOutT1; + blkCnt = numCols >> 2; + while (blkCnt > 0U) + { + *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA *invIn; + pTmpA += 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = numCols & 3; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp32q(blkCnt); + + vecA = vldrwq_f32(pTmpA); + vecA = vecA * invIn; + vstrwq_p_f32(pTmpA, vecA, p0); + } + + pOutT1 += numCols; + + /* + * Replace the rows with the sum of that row and a multiple of row i + * * so that each new element in column i above row i is zero. + */ + + /* + * Temporary pointers for input and destination matrices + */ + pInT1 = pIn; + pOutT1 = pOut; + + for (i = 0U; i < numRows; i++) + { + /* + * Check for the pivot element + */ + if (i == l) + { + /* + * If the processing element is the pivot element, + * only the columns to the right are to be processed + */ + pInT1 += numCols - l; + pOutT1 += numCols; + } + else + { + /* + * Element of the reference row + */ + + /* + * Working pointers for input and destination pivot rows + */ + pPRT_in = pPivotRowIn; + pPRT_pDst = pPivotRowDst; + /* + * Loop over the number of columns to the right of the pivot element, + * to replace the elements in the input matrix + */ + + in = *pInT1; + f32x4_t tmpV = vdupq_n_f32(in); + + blkCnt = (numCols - l) >> 2; + while (blkCnt > 0U) + { + f32x4_t vec1, vec2; + /* + * Replace the element by the sum of that row + * and a multiple of the reference row + */ + vec1 = vldrwq_f32(pInT1); + vec2 = vldrwq_f32(pPRT_in); + vec1 = vfmsq_f32(vec1, tmpV, vec2); + vstrwq_f32(pInT1, vec1); + pPRT_in += 4; + pInT1 += 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = (numCols - l) & 3; + if (blkCnt > 0U) + { + f32x4_t vec1, vec2; + mve_pred16_t p0 = vctp32q(blkCnt); + + vec1 = vldrwq_f32(pInT1); + vec2 = vldrwq_f32(pPRT_in); + vec1 = vfmsq_f32(vec1, tmpV, vec2); + vstrwq_p_f32(pInT1, vec1, p0); + pInT1 += blkCnt; + } + + blkCnt = numCols >> 2; + while (blkCnt > 0U) + { + f32x4_t vec1, vec2; + + /* + * Replace the element by the sum of that row + * and a multiple of the reference row + */ + vec1 = vldrwq_f32(pOutT1); + vec2 = vldrwq_f32(pPRT_pDst); + vec1 = vfmsq_f32(vec1, tmpV, vec2); + vstrwq_f32(pOutT1, vec1); + pPRT_pDst += 4; + pOutT1 += 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = numCols & 3; + if (blkCnt > 0U) + { + f32x4_t vec1, vec2; + mve_pred16_t p0 = vctp32q(blkCnt); + + vec1 = vldrwq_f32(pOutT1); + vec2 = vldrwq_f32(pPRT_pDst); + vec1 = vfmsq_f32(vec1, tmpV, vec2); + vstrwq_p_f32(pOutT1, vec1, p0); + + pInT2 += blkCnt; + pOutT1 += blkCnt; + } + } + /* + * Increment the temporary input pointer + */ + pInT1 = pInT1 + l; + } + /* + * Increment the input pointer + */ + pIn++; + /* + * Decrement the loop counter + */ + loopCnt--; + /* + * Increment the index modifier + */ + l++; + } + + /* + * Set status as ARM_MATH_SUCCESS + */ + status = ARM_MATH_SUCCESS; + + if ((flag != 1U) && (in == 0.0f)) + { + pIn = pSrc->pData; + for (i = 0; i < numRows * numCols; i++) + { + if (pIn[i] != 0.0f) + break; + } + + if (i == numRows * numCols) + status = ARM_MATH_SINGULAR; + } + } + /* Return to application */ + return (status); +} + +#else +#if defined(ARM_MATH_NEON) +arm_status arm_mat_inverse_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn = pSrc->pData; /* input data matrix pointer */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */ + float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */ + float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */ + uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */ + uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */ + + + float32_t Xchg, in = 0.0f, in1; /* Temporary input values */ + uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */ + arm_status status; /* status of matrix inverse */ + float32x4_t vec1; + float32x4_t vec2; + float32x4_t tmpV; + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) + || (pSrc->numRows != pDst->numRows)) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /*-------------------------------------------------------------------------------------------------------------- + * Matrix Inverse can be solved using elementary row operations. + * + * Gauss-Jordan Method: + * + * 1. First combine the identity matrix and the input matrix separated by a bar to form an + * augmented matrix as follows: + * _ _ _ _ + * | a11 a12 | 1 0 | | X11 X12 | + * | | | = | | + * |_ a21 a22 | 0 1 _| |_ X21 X21 _| + * + * 2. In our implementation, pDst Matrix is used as identity matrix. + * + * 3. Begin with the first row. Let i = 1. + * + * 4. Check to see if the pivot for row i is zero. + * The pivot is the element of the main diagonal that is on the current row. + * For instance, if working with row i, then the pivot element is aii. + * If the pivot is zero, exchange that row with a row below it that does not + * contain a zero in column i. If this is not possible, then an inverse + * to that matrix does not exist. + * + * 5. Divide every element of row i by the pivot. + * + * 6. For every row below and row i, replace that row with the sum of that row and + * a multiple of row i so that each new element in column i below row i is zero. + * + * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros + * for every element below and above the main diagonal. + * + * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc). + * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst). + *----------------------------------------------------------------------------------------------------------------*/ + + /* Working pointer for destination matrix */ + pOutT1 = pOut; + + /* Loop over the number of rows */ + rowCnt = numRows; + + /* Making the destination matrix as identity matrix */ + while (rowCnt > 0U) + { + /* Writing all zeroes in lower triangle of the destination matrix */ + j = numRows - rowCnt; + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + + /* Writing all ones in the diagonal of the destination matrix */ + *pOutT1++ = 1.0f; + + /* Writing all zeroes in upper triangle of the destination matrix */ + j = rowCnt - 1U; + + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + + /* Decrement the loop counter */ + rowCnt--; + } + + /* Loop over the number of columns of the input matrix. + All the elements in each column are processed by the row operations */ + loopCnt = numCols; + + /* Index modifier to navigate through the columns */ + l = 0U; + + while (loopCnt > 0U) + { + /* Check if the pivot element is zero.. + * If it is zero then interchange the row with non zero row below. + * If there is no non zero element to replace in the rows below, + * then the matrix is Singular. */ + + /* Working pointer for the input matrix that points + * to the pivot element of the particular row */ + pInT1 = pIn + (l * numCols); + + /* Working pointer for the destination matrix that points + * to the pivot element of the particular row */ + pOutT1 = pOut + (l * numCols); + + /* Temporary variable to hold the pivot value */ + in = *pInT1; + + /* Check if the pivot element is zero */ + if (*pInT1 == 0.0f) + { + /* Loop over the number rows present below */ + for (i = 1U; i < numRows - l; i++) + { + /* Update the input and destination pointers */ + pInT2 = pInT1 + (numCols * i); + pOutT2 = pOutT1 + (numCols * i); + + /* Check if there is a non zero pivot element to + * replace in the rows below */ + if (*pInT2 != 0.0f) + { + /* Loop over number of columns + * to the right of the pilot element */ + j = numCols - l; + + while (j > 0U) + { + /* Exchange the row elements of the input matrix */ + Xchg = *pInT2; + *pInT2++ = *pInT1; + *pInT1++ = Xchg; + + /* Decrement the loop counter */ + j--; + } + + /* Loop over number of columns of the destination matrix */ + j = numCols; + + while (j > 0U) + { + /* Exchange the row elements of the destination matrix */ + Xchg = *pOutT2; + *pOutT2++ = *pOutT1; + *pOutT1++ = Xchg; + + /* Decrement the loop counter */ + j--; + } + + /* Flag to indicate whether exchange is done or not */ + flag = 1U; + + /* Break after exchange is done */ + break; + } + + + } + } + + /* Update the status if the matrix is singular */ + if ((flag != 1U) && (in == 0.0f)) + { + return ARM_MATH_SINGULAR; + } + + /* Points to the pivot row of input and destination matrices */ + pPivotRowIn = pIn + (l * numCols); + pPivotRowDst = pOut + (l * numCols); + + /* Temporary pointers to the pivot row pointers */ + pInT1 = pPivotRowIn; + pInT2 = pPivotRowDst; + + /* Pivot element of the row */ + in = *pPivotRowIn; + tmpV = vdupq_n_f32(1.0f/in); + + /* Loop over number of columns + * to the right of the pilot element */ + j = (numCols - l) >> 2; + + while (j > 0U) + { + /* Divide each element of the row of the input matrix + * by the pivot element */ + vec1 = vld1q_f32(pInT1); + + vec1 = vmulq_f32(vec1, tmpV); + vst1q_f32(pInT1, vec1); + pInT1 += 4; + + /* Decrement the loop counter */ + j--; + } + + /* Tail */ + j = (numCols - l) & 3; + + while (j > 0U) + { + /* Divide each element of the row of the input matrix + * by the pivot element */ + in1 = *pInT1; + *pInT1++ = in1 / in; + + /* Decrement the loop counter */ + j--; + } + + /* Loop over number of columns of the destination matrix */ + j = numCols >> 2; + + while (j > 0U) + { + /* Divide each element of the row of the destination matrix + * by the pivot element */ + vec1 = vld1q_f32(pInT2); + + vec1 = vmulq_f32(vec1, tmpV); + vst1q_f32(pInT2, vec1); + pInT2 += 4; + + /* Decrement the loop counter */ + j--; + } + + /* Tail */ + j = numCols & 3; + + while (j > 0U) + { + /* Divide each element of the row of the destination matrix + * by the pivot element */ + in1 = *pInT2; + *pInT2++ = in1 / in; + + /* Decrement the loop counter */ + j--; + } + + /* Replace the rows with the sum of that row and a multiple of row i + * so that each new element in column i above row i is zero.*/ + + /* Temporary pointers for input and destination matrices */ + pInT1 = pIn; + pInT2 = pOut; + + /* index used to check for pivot element */ + i = 0U; + + /* Loop over number of rows */ + /* to be replaced by the sum of that row and a multiple of row i */ + k = numRows; + + while (k > 0U) + { + /* Check for the pivot element */ + if (i == l) + { + /* If the processing element is the pivot element, + only the columns to the right are to be processed */ + pInT1 += numCols - l; + + pInT2 += numCols; + } + else + { + /* Element of the reference row */ + in = *pInT1; + tmpV = vdupq_n_f32(in); + + /* Working pointers for input and destination pivot rows */ + pPRT_in = pPivotRowIn; + pPRT_pDst = pPivotRowDst; + + /* Loop over the number of columns to the right of the pivot element, + to replace the elements in the input matrix */ + j = (numCols - l) >> 2; + + while (j > 0U) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + vec1 = vld1q_f32(pInT1); + vec2 = vld1q_f32(pPRT_in); + vec1 = vmlsq_f32(vec1, tmpV, vec2); + vst1q_f32(pInT1, vec1); + pPRT_in += 4; + pInT1 += 4; + + /* Decrement the loop counter */ + j--; + } + + /* Tail */ + j = (numCols - l) & 3; + + while (j > 0U) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + in1 = *pInT1; + *pInT1++ = in1 - (in * *pPRT_in++); + + /* Decrement the loop counter */ + j--; + } + + /* Loop over the number of columns to + replace the elements in the destination matrix */ + j = numCols >> 2; + + while (j > 0U) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + vec1 = vld1q_f32(pInT2); + vec2 = vld1q_f32(pPRT_pDst); + vec1 = vmlsq_f32(vec1, tmpV, vec2); + vst1q_f32(pInT2, vec1); + pPRT_pDst += 4; + pInT2 += 4; + + /* Decrement the loop counter */ + j--; + } + + /* Tail */ + j = numCols & 3; + + while (j > 0U) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + in1 = *pInT2; + *pInT2++ = in1 - (in * *pPRT_pDst++); + + /* Decrement the loop counter */ + j--; + } + + } + + /* Increment the temporary input pointer */ + pInT1 = pInT1 + l; + + /* Decrement the loop counter */ + k--; + + /* Increment the pivot index */ + i++; + } + + /* Increment the input pointer */ + pIn++; + + /* Decrement the loop counter */ + loopCnt--; + + /* Increment the index modifier */ + l++; + } + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + + if ((flag != 1U) && (in == 0.0f)) + { + pIn = pSrc->pData; + for (i = 0; i < numRows * numCols; i++) + { + if (pIn[i] != 0.0f) + break; + } + + if (i == numRows * numCols) + status = ARM_MATH_SINGULAR; + } + } + /* Return to application */ + return (status); +} +#else +arm_status arm_mat_inverse_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn = pSrc->pData; /* input data matrix pointer */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *pInT1, *pInT2; /* Temporary input data matrix pointer */ + float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */ + float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst; /* Temporary input and output data matrix pointer */ + uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */ + uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */ + +#if defined (ARM_MATH_DSP) + + float32_t Xchg, in = 0.0f, in1; /* Temporary input values */ + uint32_t i, rowCnt, flag = 0U, j, loopCnt, k,l; /* loop counters */ + arm_status status; /* status of matrix inverse */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pSrc->numCols) || + (pDst->numRows != pDst->numCols) || + (pSrc->numRows != pDst->numRows) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + + /*-------------------------------------------------------------------------------------------------------------- + * Matrix Inverse can be solved using elementary row operations. + * + * Gauss-Jordan Method: + * + * 1. First combine the identity matrix and the input matrix separated by a bar to form an + * augmented matrix as follows: + * _ _ _ _ + * | a11 a12 | 1 0 | | X11 X12 | + * | | | = | | + * |_ a21 a22 | 0 1 _| |_ X21 X21 _| + * + * 2. In our implementation, pDst Matrix is used as identity matrix. + * + * 3. Begin with the first row. Let i = 1. + * + * 4. Check to see if the pivot for row i is zero. + * The pivot is the element of the main diagonal that is on the current row. + * For instance, if working with row i, then the pivot element is aii. + * If the pivot is zero, exchange that row with a row below it that does not + * contain a zero in column i. If this is not possible, then an inverse + * to that matrix does not exist. + * + * 5. Divide every element of row i by the pivot. + * + * 6. For every row below and row i, replace that row with the sum of that row and + * a multiple of row i so that each new element in column i below row i is zero. + * + * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros + * for every element below and above the main diagonal. + * + * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc). + * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst). + *----------------------------------------------------------------------------------------------------------------*/ + + /* Working pointer for destination matrix */ + pOutT1 = pOut; + + /* Loop over the number of rows */ + rowCnt = numRows; + + /* Making the destination matrix as identity matrix */ + while (rowCnt > 0U) + { + /* Writing all zeroes in lower triangle of the destination matrix */ + j = numRows - rowCnt; + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + + /* Writing all ones in the diagonal of the destination matrix */ + *pOutT1++ = 1.0f; + + /* Writing all zeroes in upper triangle of the destination matrix */ + j = rowCnt - 1U; + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + + /* Decrement loop counter */ + rowCnt--; + } + + /* Loop over the number of columns of the input matrix. + All the elements in each column are processed by the row operations */ + loopCnt = numCols; + + /* Index modifier to navigate through the columns */ + l = 0U; + + while (loopCnt > 0U) + { + /* Check if the pivot element is zero.. + * If it is zero then interchange the row with non zero row below. + * If there is no non zero element to replace in the rows below, + * then the matrix is Singular. */ + + /* Working pointer for the input matrix that points + * to the pivot element of the particular row */ + pInT1 = pIn + (l * numCols); + + /* Working pointer for the destination matrix that points + * to the pivot element of the particular row */ + pOutT1 = pOut + (l * numCols); + + /* Temporary variable to hold the pivot value */ + in = *pInT1; + + + + /* Check if the pivot element is zero */ + if (*pInT1 == 0.0f) + { + /* Loop over the number rows present below */ + + for (i = 1U; i < numRows - l; i++) + { + /* Update the input and destination pointers */ + pInT2 = pInT1 + (numCols * i); + pOutT2 = pOutT1 + (numCols * i); + + /* Check if there is a non zero pivot element to + * replace in the rows below */ + if (*pInT2 != 0.0f) + { + /* Loop over number of columns + * to the right of the pilot element */ + j = numCols - l; + + while (j > 0U) + { + /* Exchange the row elements of the input matrix */ + Xchg = *pInT2; + *pInT2++ = *pInT1; + *pInT1++ = Xchg; + + /* Decrement the loop counter */ + j--; + } + + /* Loop over number of columns of the destination matrix */ + j = numCols; + + while (j > 0U) + { + /* Exchange the row elements of the destination matrix */ + Xchg = *pOutT2; + *pOutT2++ = *pOutT1; + *pOutT1++ = Xchg; + + /* Decrement loop counter */ + j--; + } + + /* Flag to indicate whether exchange is done or not */ + flag = 1U; + + /* Break after exchange is done */ + break; + } + + + /* Decrement loop counter */ + } + } + + /* Update the status if the matrix is singular */ + if ((flag != 1U) && (in == 0.0f)) + { + return ARM_MATH_SINGULAR; + } + + /* Points to the pivot row of input and destination matrices */ + pPivotRowIn = pIn + (l * numCols); + pPivotRowDst = pOut + (l * numCols); + + /* Temporary pointers to the pivot row pointers */ + pInT1 = pPivotRowIn; + pInT2 = pPivotRowDst; + + /* Pivot element of the row */ + in = *pPivotRowIn; + + /* Loop over number of columns + * to the right of the pilot element */ + j = (numCols - l); + + while (j > 0U) + { + /* Divide each element of the row of the input matrix + * by the pivot element */ + in1 = *pInT1; + *pInT1++ = in1 / in; + + /* Decrement the loop counter */ + j--; + } + + /* Loop over number of columns of the destination matrix */ + j = numCols; + + while (j > 0U) + { + /* Divide each element of the row of the destination matrix + * by the pivot element */ + in1 = *pInT2; + *pInT2++ = in1 / in; + + /* Decrement the loop counter */ + j--; + } + + /* Replace the rows with the sum of that row and a multiple of row i + * so that each new element in column i above row i is zero.*/ + + /* Temporary pointers for input and destination matrices */ + pInT1 = pIn; + pInT2 = pOut; + + /* index used to check for pivot element */ + i = 0U; + + /* Loop over number of rows */ + /* to be replaced by the sum of that row and a multiple of row i */ + k = numRows; + + while (k > 0U) + { + /* Check for the pivot element */ + if (i == l) + { + /* If the processing element is the pivot element, + only the columns to the right are to be processed */ + pInT1 += numCols - l; + + pInT2 += numCols; + } + else + { + /* Element of the reference row */ + in = *pInT1; + + /* Working pointers for input and destination pivot rows */ + pPRT_in = pPivotRowIn; + pPRT_pDst = pPivotRowDst; + + /* Loop over the number of columns to the right of the pivot element, + to replace the elements in the input matrix */ + j = (numCols - l); + + while (j > 0U) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + in1 = *pInT1; + *pInT1++ = in1 - (in * *pPRT_in++); + + /* Decrement the loop counter */ + j--; + } + + /* Loop over the number of columns to + replace the elements in the destination matrix */ + j = numCols; + + while (j > 0U) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + in1 = *pInT2; + *pInT2++ = in1 - (in * *pPRT_pDst++); + + /* Decrement loop counter */ + j--; + } + + } + + /* Increment temporary input pointer */ + pInT1 = pInT1 + l; + + /* Decrement loop counter */ + k--; + + /* Increment pivot index */ + i++; + } + + /* Increment the input pointer */ + pIn++; + + /* Decrement the loop counter */ + loopCnt--; + + /* Increment the index modifier */ + l++; + } + + +#else + + float32_t Xchg, in = 0.0f; /* Temporary input values */ + uint32_t i, rowCnt, flag = 0U, j, loopCnt, l; /* loop counters */ + arm_status status; /* status of matrix inverse */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pSrc->numCols) || + (pDst->numRows != pDst->numCols) || + (pSrc->numRows != pDst->numRows) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + + /*-------------------------------------------------------------------------------------------------------------- + * Matrix Inverse can be solved using elementary row operations. + * + * Gauss-Jordan Method: + * + * 1. First combine the identity matrix and the input matrix separated by a bar to form an + * augmented matrix as follows: + * _ _ _ _ _ _ _ _ + * | | a11 a12 | | | 1 0 | | | X11 X12 | + * | | | | | | | = | | + * |_ |_ a21 a22 _| | |_0 1 _| _| |_ X21 X21 _| + * + * 2. In our implementation, pDst Matrix is used as identity matrix. + * + * 3. Begin with the first row. Let i = 1. + * + * 4. Check to see if the pivot for row i is zero. + * The pivot is the element of the main diagonal that is on the current row. + * For instance, if working with row i, then the pivot element is aii. + * If the pivot is zero, exchange that row with a row below it that does not + * contain a zero in column i. If this is not possible, then an inverse + * to that matrix does not exist. + * + * 5. Divide every element of row i by the pivot. + * + * 6. For every row below and row i, replace that row with the sum of that row and + * a multiple of row i so that each new element in column i below row i is zero. + * + * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros + * for every element below and above the main diagonal. + * + * 8. Now an identical matrix is formed to the left of the bar(input matrix, src). + * Therefore, the matrix to the right of the bar is our solution(dst matrix, dst). + *----------------------------------------------------------------------------------------------------------------*/ + + /* Working pointer for destination matrix */ + pOutT1 = pOut; + + /* Loop over the number of rows */ + rowCnt = numRows; + + /* Making the destination matrix as identity matrix */ + while (rowCnt > 0U) + { + /* Writing all zeroes in lower triangle of the destination matrix */ + j = numRows - rowCnt; + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + + /* Writing all ones in the diagonal of the destination matrix */ + *pOutT1++ = 1.0f; + + /* Writing all zeroes in upper triangle of the destination matrix */ + j = rowCnt - 1U; + while (j > 0U) + { + *pOutT1++ = 0.0f; + j--; + } + + /* Decrement loop counter */ + rowCnt--; + } + + /* Loop over the number of columns of the input matrix. + All the elements in each column are processed by the row operations */ + loopCnt = numCols; + + /* Index modifier to navigate through the columns */ + l = 0U; + + while (loopCnt > 0U) + { + /* Check if the pivot element is zero.. + * If it is zero then interchange the row with non zero row below. + * If there is no non zero element to replace in the rows below, + * then the matrix is Singular. */ + + /* Working pointer for the input matrix that points + * to the pivot element of the particular row */ + pInT1 = pIn + (l * numCols); + + /* Working pointer for the destination matrix that points + * to the pivot element of the particular row */ + pOutT1 = pOut + (l * numCols); + + /* Temporary variable to hold the pivot value */ + in = *pInT1; + + /* Check if the pivot element is zero */ + if (*pInT1 == 0.0f) + { + /* Loop over the number rows present below */ + for (i = 1U; i < numRows-l; i++) + { + /* Update the input and destination pointers */ + pInT2 = pInT1 + (numCols * i); + pOutT2 = pOutT1 + (numCols * i); + + /* Check if there is a non zero pivot element to + * replace in the rows below */ + if (*pInT2 != 0.0f) + { + /* Loop over number of columns + * to the right of the pilot element */ + for (j = 0U; j < (numCols - l); j++) + { + /* Exchange the row elements of the input matrix */ + Xchg = *pInT2; + *pInT2++ = *pInT1; + *pInT1++ = Xchg; + } + + for (j = 0U; j < numCols; j++) + { + Xchg = *pOutT2; + *pOutT2++ = *pOutT1; + *pOutT1++ = Xchg; + } + + /* Flag to indicate whether exchange is done or not */ + flag = 1U; + + /* Break after exchange is done */ + break; + } + } + } + + + /* Update the status if the matrix is singular */ + if ((flag != 1U) && (in == 0.0f)) + { + return ARM_MATH_SINGULAR; + } + + /* Points to the pivot row of input and destination matrices */ + pPivotRowIn = pIn + (l * numCols); + pPivotRowDst = pOut + (l * numCols); + + /* Temporary pointers to the pivot row pointers */ + pInT1 = pPivotRowIn; + pOutT1 = pPivotRowDst; + + /* Pivot element of the row */ + in = *(pIn + (l * numCols)); + + /* Loop over number of columns + * to the right of the pilot element */ + for (j = 0U; j < (numCols - l); j++) + { + /* Divide each element of the row of the input matrix + * by the pivot element */ + *pInT1 = *pInT1 / in; + pInT1++; + } + for (j = 0U; j < numCols; j++) + { + /* Divide each element of the row of the destination matrix + * by the pivot element */ + *pOutT1 = *pOutT1 / in; + pOutT1++; + } + + /* Replace the rows with the sum of that row and a multiple of row i + * so that each new element in column i above row i is zero.*/ + + /* Temporary pointers for input and destination matrices */ + pInT1 = pIn; + pOutT1 = pOut; + + for (i = 0U; i < numRows; i++) + { + /* Check for the pivot element */ + if (i == l) + { + /* If the processing element is the pivot element, + only the columns to the right are to be processed */ + pInT1 += numCols - l; + pOutT1 += numCols; + } + else + { + /* Element of the reference row */ + in = *pInT1; + + /* Working pointers for input and destination pivot rows */ + pPRT_in = pPivotRowIn; + pPRT_pDst = pPivotRowDst; + + /* Loop over the number of columns to the right of the pivot element, + to replace the elements in the input matrix */ + for (j = 0U; j < (numCols - l); j++) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + *pInT1 = *pInT1 - (in * *pPRT_in++); + pInT1++; + } + + /* Loop over the number of columns to + replace the elements in the destination matrix */ + for (j = 0U; j < numCols; j++) + { + /* Replace the element by the sum of that row + and a multiple of the reference row */ + *pOutT1 = *pOutT1 - (in * *pPRT_pDst++); + pOutT1++; + } + + } + + /* Increment temporary input pointer */ + pInT1 = pInT1 + l; + } + + /* Increment the input pointer */ + pIn++; + + /* Decrement the loop counter */ + loopCnt--; + + /* Increment the index modifier */ + l++; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + + if ((flag != 1U) && (in == 0.0f)) + { + pIn = pSrc->pData; + for (i = 0; i < numRows * numCols; i++) + { + if (pIn[i] != 0.0f) + break; + } + + if (i == numRows * numCols) + status = ARM_MATH_SINGULAR; + } + } + + /* Return to application */ + return (status); +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of MatrixInv group + */ |