1 files changed, 1570 insertions, 0 deletions
diff --git a/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c b/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c
new file mode 100644
index 0000000..b0ef760
--- /dev/null
+++ b/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c
@@ -0,0 +1,1570 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_inverse_f32.c
+ * Description:  Floating-point matrix inverse
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/matrix_functions.h"
+
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @defgroup MatrixInv Matrix Inverse
+
+  Computes the inverse of a matrix.
+
+  The inverse is defined only if the input matrix is square and non-singular (the determinant is non-zero).
+  The function checks that the input and output matrices are square and of the same size.
+
+  Matrix inversion is numerically sensitive and the CMSIS DSP library only supports matrix
+  inversion of floating-point matrices.
+
+  @par Algorithm
+  The Gauss-Jordan method is used to find the inverse.
+  The algorithm performs a sequence of elementary row-operations until it
+  reduces the input matrix to an identity matrix. Applying the same sequence
+  of elementary row-operations to an identity matrix yields the inverse matrix.
+  If the input matrix is singular, then the algorithm terminates and returns error status
+  <code>ARM_MATH_SINGULAR</code>.
+  \image html MatrixInverse.gif "Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method"
+ */
+
+/**
+  @addtogroup MatrixInv
+  @{
+ */
+
+/**
+  @brief         Floating-point matrix inverse.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[out]    pDst      points to output matrix structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+                   - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+arm_status arm_mat_inverse_f32(
+  const arm_matrix_instance_f32 * pSrc,
+  arm_matrix_instance_f32 * pDst)
+{
+    float32_t *pIn = pSrc->pData;   /* input data matrix pointer */
+    float32_t *pOut = pDst->pData;  /* output data matrix pointer */
+    float32_t *pInT1, *pInT2;   /* Temporary input data matrix pointer */
+    float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
+    float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;    /* Temporary input and output data matrix pointer */
+
+    uint32_t  numRows = pSrc->numRows;  /* Number of rows in the matrix  */
+    uint32_t  numCols = pSrc->numCols;  /* Number of Cols in the matrix  */
+    float32_t *pTmpA, *pTmpB;
+
+    float32_t in = 0.0f;        /* Temporary input values  */
+    uint32_t  i, rowCnt, flag = 0U, j, loopCnt, l;   /* loop counters */
+    arm_status status;          /* status of matrix inverse */
+    uint32_t  blkCnt;
+
+#ifdef ARM_MATH_MATRIX_CHECK
+   /* Check for matrix mismatch condition */
+  if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
+     || (pSrc->numRows != pDst->numRows))
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+#endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
+  {
+
+    /*--------------------------------------------------------------------------------------------------------------
+     * Matrix Inverse can be solved using elementary row operations.
+     *
+     *  Gauss-Jordan Method:
+     *
+     *     1. First combine the identity matrix and the input matrix separated by a bar to form an
+     *        augmented matrix as follows:
+     *                      _  _          _     _      _   _         _         _
+     *                     |  |  a11  a12  | | | 1   0  |   |       |  X11 X12  |
+     *                     |  |            | | |        |   |   =   |           |
+     *                     |_ |_ a21  a22 _| | |_0   1 _|  _|       |_ X21 X21 _|
+     *
+     *      2. In our implementation, pDst Matrix is used as identity matrix.
+     *
+     *      3. Begin with the first row. Let i = 1.
+     *
+     *      4. Check to see if the pivot for row i is zero.
+     *         The pivot is the element of the main diagonal that is on the current row.
+     *         For instance, if working with row i, then the pivot element is aii.
+     *         If the pivot is zero, exchange that row with a row below it that does not
+     *         contain a zero in column i. If this is not possible, then an inverse
+     *         to that matrix does not exist.
+     *
+     *      5. Divide every element of row i by the pivot.
+     *
+     *      6. For every row below and  row i, replace that row with the sum of that row and
+     *         a multiple of row i so that each new element in column i below row i is zero.
+     *
+     *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
+     *         for every element below and above the main diagonal.
+     *
+     *      8. Now an identical matrix is formed to the left of the bar(input matrix, src).
+     *         Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
+     *----------------------------------------------------------------------------------------------------------------*/
+
+        /*
+         * Working pointer for destination matrix
+         */
+        pOutT1 = pOut;
+        /*
+         * Loop over the number of rows
+         */
+        rowCnt = numRows;
+        /*
+         * Making the destination matrix as identity matrix
+         */
+        while (rowCnt > 0U)
+        {
+            /*
+             * Writing all zeroes in lower triangle of the destination matrix
+             */
+            j = numRows - rowCnt;
+            while (j > 0U)
+            {
+                *pOutT1++ = 0.0f;
+                j--;
+            }
+            /*
+             * Writing all ones in the diagonal of the destination matrix
+             */
+            *pOutT1++ = 1.0f;
+            /*
+             * Writing all zeroes in upper triangle of the destination matrix
+             */
+            j = rowCnt - 1U;
+            while (j > 0U)
+            {
+                *pOutT1++ = 0.0f;
+                j--;
+            }
+            /*
+             * Decrement the loop counter
+             */
+            rowCnt--;
+        }
+
+        /*
+         * Loop over the number of columns of the input matrix.
+         * All the elements in each column are processed by the row operations
+         */
+        loopCnt = numCols;
+        /*
+         * Index modifier to navigate through the columns
+         */
+        l = 0U;
+        while (loopCnt > 0U)
+        {
+            /*
+             * Check if the pivot element is zero..
+             * If it is zero then interchange the row with non zero row below.
+             * If there is no non zero element to replace in the rows below,
+             * then the matrix is Singular.
+             */
+
+            /*
+             * Working pointer for the input matrix that points
+             * * to the pivot element of the particular row
+             */
+            pInT1 = pIn + (l * numCols);
+            /*
+             * Working pointer for the destination matrix that points
+             * * to the pivot element of the particular row
+             */
+            pOutT1 = pOut + (l * numCols);
+            /*
+             * Temporary variable to hold the pivot value
+             */
+            in = *pInT1;
+           
+
+            /*
+             * Check if the pivot element is zero
+             */
+            if (*pInT1 == 0.0f)
+            {
+                /*
+                 * Loop over the number rows present below
+                 */
+                for (i = 1U; i < numRows-l; i++)
+                {
+                    /*
+                     * Update the input and destination pointers
+                     */
+                    pInT2 = pInT1 + (numCols * i);
+                    pOutT2 = pOutT1 + (numCols * i);
+                    /*
+                     * Check if there is a non zero pivot element to
+                     * * replace in the rows below
+                     */
+                    if (*pInT2 != 0.0f)
+                    {
+                        f32x4_t vecA, vecB;
+                        /*
+                         * Loop over number of columns
+                         * * to the right of the pilot element
+                         */
+                        pTmpA = pInT1;
+                        pTmpB = pInT2;
+                        blkCnt = (numCols - l) >> 2;
+                        while (blkCnt > 0U)
+                        {
+                            
+                            vecA = vldrwq_f32(pTmpA);
+                            vecB = vldrwq_f32(pTmpB);
+                            vstrwq_f32(pTmpB, vecA);
+                            vstrwq_f32(pTmpA, vecB);
+
+                            pTmpA += 4;
+                            pTmpB += 4;
+                            /*
+                             * Decrement the blockSize loop counter
+                             */
+                            blkCnt--;
+                        }
+                        /*
+                         * tail
+                         * (will be merged thru tail predication)
+                         */
+                        blkCnt = (numCols - l) & 3;
+                        if (blkCnt > 0U)
+                        {
+                            mve_pred16_t p0 = vctp32q(blkCnt);
+
+                            vecA = vldrwq_f32(pTmpA);
+                            vecB = vldrwq_f32(pTmpB);
+                            vstrwq_p_f32(pTmpB, vecA, p0);
+                            vstrwq_p_f32(pTmpA, vecB, p0);
+                        }
+
+                        pInT1 += numCols - l;
+                        pInT2 += numCols - l;
+                        pTmpA = pOutT1;
+                        pTmpB = pOutT2;
+                        blkCnt = numCols >> 2;
+                        while (blkCnt > 0U)
+                        {
+
+                            vecA = vldrwq_f32(pTmpA);
+                            vecB = vldrwq_f32(pTmpB);
+                            vstrwq_f32(pTmpB, vecA);
+                            vstrwq_f32(pTmpA, vecB);
+                            pTmpA += 4;
+                            pTmpB += 4;
+                            /*
+                             * Decrement the blockSize loop counter
+                             */
+                            blkCnt--;
+                        }
+                        /*
+                         * tail
+                         */
+                        blkCnt = numCols & 3;
+                        if (blkCnt > 0U)
+                        {
+                            mve_pred16_t p0 = vctp32q(blkCnt);
+
+                            vecA = vldrwq_f32(pTmpA);
+                            vecB = vldrwq_f32(pTmpB);
+                            vstrwq_p_f32(pTmpB, vecA, p0);
+                            vstrwq_p_f32(pTmpA, vecB, p0);
+                        }
+
+                        pOutT1 += numCols;
+                        pOutT2 += numCols;
+                        /*
+                         * Flag to indicate whether exchange is done or not
+                         */
+                        flag = 1U;
+
+                        /*
+                         * Break after exchange is done
+                         */
+                        break;
+                    }
+                    
+                }
+            }
+
+            /*
+             * Update the status if the matrix is singular
+             */
+            if ((flag != 1U) && (in == 0.0f))
+            {
+                return ARM_MATH_SINGULAR;
+            }
+
+            /*
+             * Points to the pivot row of input and destination matrices
+             */
+            pPivotRowIn = pIn + (l * numCols);
+            pPivotRowDst = pOut + (l * numCols);
+
+            /*
+             * Temporary pointers to the pivot row pointers
+             */
+            pInT1 = pPivotRowIn;
+            pOutT1 = pPivotRowDst;
+
+            /*
+             * Pivot element of the row
+             */
+            in = *(pIn + (l * numCols));
+
+            pTmpA = pInT1;
+
+            f32x4_t invIn = vdupq_n_f32(1.0f / in);
+
+            blkCnt = (numCols - l) >> 2;
+            f32x4_t vecA;
+            while (blkCnt > 0U)
+            {
+                *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA * invIn;
+                pTmpA += 4;
+                /*
+                 * Decrement the blockSize loop counter
+                 */
+                blkCnt--;
+            }
+            /*
+             * tail
+             */
+            blkCnt = (numCols - l) & 3;
+            if (blkCnt > 0U)
+            {
+                mve_pred16_t p0 = vctp32q(blkCnt);
+                
+
+                vecA = vldrwq_f32(pTmpA);
+                vecA = vecA * invIn;
+                vstrwq_p_f32(pTmpA, vecA, p0);
+            }
+
+            pInT1 += numCols - l;
+            /*
+             * Loop over number of columns
+             * * to the right of the pilot element
+             */
+
+            pTmpA = pOutT1;
+            blkCnt = numCols >> 2;
+            while (blkCnt > 0U)
+            {
+                *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA *invIn;
+                pTmpA += 4;
+                /*
+                 * Decrement the blockSize loop counter
+                 */
+                blkCnt--;
+            }
+            /*
+             * tail
+             * (will be merged thru tail predication)
+             */
+            blkCnt = numCols & 3;
+            if (blkCnt > 0U)
+            {
+                mve_pred16_t p0 = vctp32q(blkCnt);
+
+                vecA = vldrwq_f32(pTmpA);
+                vecA = vecA * invIn;
+                vstrwq_p_f32(pTmpA, vecA, p0);
+            }
+
+            pOutT1 += numCols;
+
+            /*
+             * Replace the rows with the sum of that row and a multiple of row i
+             * * so that each new element in column i above row i is zero.
+             */
+
+            /*
+             * Temporary pointers for input and destination matrices
+             */
+            pInT1 = pIn;
+            pOutT1 = pOut;
+
+            for (i = 0U; i < numRows; i++)
+            {
+                /*
+                 * Check for the pivot element
+                 */
+                if (i == l)
+                {
+                    /*
+                     * If the processing element is the pivot element,
+                     * only the columns to the right are to be processed
+                     */
+                    pInT1 += numCols - l;
+                    pOutT1 += numCols;
+                }
+                else
+                {
+                    /*
+                     * Element of the reference row
+                     */
+
+                    /*
+                     * Working pointers for input and destination pivot rows
+                     */
+                    pPRT_in = pPivotRowIn;
+                    pPRT_pDst = pPivotRowDst;
+                    /*
+                     * Loop over the number of columns to the right of the pivot element,
+                     * to replace the elements in the input matrix
+                     */
+
+                    in = *pInT1;
+                    f32x4_t tmpV = vdupq_n_f32(in);
+
+                    blkCnt = (numCols - l) >> 2;
+                    while (blkCnt > 0U)
+                    {
+                        f32x4_t vec1, vec2;
+                        /*
+                         * Replace the element by the sum of that row
+                         * and a multiple of the reference row
+                         */
+                        vec1 = vldrwq_f32(pInT1);
+                        vec2 = vldrwq_f32(pPRT_in);
+                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
+                        vstrwq_f32(pInT1, vec1);
+                        pPRT_in += 4;
+                        pInT1 += 4;
+                        /*
+                         * Decrement the blockSize loop counter
+                         */
+                        blkCnt--;
+                    }
+                    /*
+                     * tail
+                     * (will be merged thru tail predication)
+                     */
+                    blkCnt = (numCols - l) & 3;
+                    if (blkCnt > 0U)
+                    {
+                        f32x4_t vec1, vec2;
+                        mve_pred16_t p0 = vctp32q(blkCnt);
+
+                        vec1 = vldrwq_f32(pInT1);
+                        vec2 = vldrwq_f32(pPRT_in);
+                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
+                        vstrwq_p_f32(pInT1, vec1, p0);
+                        pInT1 += blkCnt;
+                    }
+
+                    blkCnt = numCols >> 2;
+                    while (blkCnt > 0U)
+                    {
+                        f32x4_t vec1, vec2;
+
+                        /*
+                         * Replace the element by the sum of that row
+                         * and a multiple of the reference row
+                         */
+                        vec1 = vldrwq_f32(pOutT1);
+                        vec2 = vldrwq_f32(pPRT_pDst);
+                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
+                        vstrwq_f32(pOutT1, vec1);
+                        pPRT_pDst += 4;
+                        pOutT1 += 4;
+                        /*
+                         * Decrement the blockSize loop counter
+                         */
+                        blkCnt--;
+                    }
+                    /*
+                     * tail
+                     * (will be merged thru tail predication)
+                     */
+                    blkCnt = numCols & 3;
+                    if (blkCnt > 0U)
+                    {
+                        f32x4_t vec1, vec2;
+                        mve_pred16_t p0 = vctp32q(blkCnt);
+
+                        vec1 = vldrwq_f32(pOutT1);
+                        vec2 = vldrwq_f32(pPRT_pDst);
+                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
+                        vstrwq_p_f32(pOutT1, vec1, p0);
+
+                        pInT2 += blkCnt;
+                        pOutT1 += blkCnt;
+                    }
+                }
+                /*
+                 * Increment the temporary input pointer
+                 */
+                pInT1 = pInT1 + l;
+            }
+            /*
+             * Increment the input pointer
+             */
+            pIn++;
+            /*
+             * Decrement the loop counter
+             */
+            loopCnt--;
+            /*
+             * Increment the index modifier
+             */
+            l++;
+        }
+
+        /*
+         * Set status as ARM_MATH_SUCCESS
+         */
+        status = ARM_MATH_SUCCESS;
+
+        if ((flag != 1U) && (in == 0.0f))
+        {
+            pIn = pSrc->pData;
+            for (i = 0; i < numRows * numCols; i++)
+            {
+                if (pIn[i] != 0.0f)
+                    break;
+            }
+
+            if (i == numRows * numCols)
+                status = ARM_MATH_SINGULAR;
+        }
+  }
+  /* Return to application */
+  return (status);
+}
+
+#else
+#if defined(ARM_MATH_NEON)
+arm_status arm_mat_inverse_f32(
+  const arm_matrix_instance_f32 * pSrc,
+  arm_matrix_instance_f32 * pDst)
+{
+  float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */
+  float32_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */
+  float32_t *pOutT1, *pOutT2;                    /* Temporary output data matrix pointer */
+  float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */
+  uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
+  uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
+
+
+  float32_t Xchg, in = 0.0f, in1;                /* Temporary input values  */
+  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
+  arm_status status;                             /* status of matrix inverse */
+  float32x4_t vec1;
+  float32x4_t vec2;
+  float32x4_t tmpV;
+
+#ifdef ARM_MATH_MATRIX_CHECK
+
+  /* Check for matrix mismatch condition */
+  if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
+     || (pSrc->numRows != pDst->numRows))
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+#endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
+
+  {
+   /*--------------------------------------------------------------------------------------------------------------
+   * Matrix Inverse can be solved using elementary row operations.
+   *
+   *  Gauss-Jordan Method:
+   *
+   *     1. First combine the identity matrix and the input matrix separated by a bar to form an
+   *        augmented matrix as follows:
+   *              _                  _         _         _
+   *             |  a11  a12 | 1   0  |       |  X11 X12  |
+   *             |           |        |   =   |           |
+   *             |_ a21  a22 | 0   1 _|       |_ X21 X21 _|
+   *
+   *    2. In our implementation, pDst Matrix is used as identity matrix.
+   *
+   *    3. Begin with the first row. Let i = 1.
+   *
+   *    4. Check to see if the pivot for row i is zero.
+   *       The pivot is the element of the main diagonal that is on the current row.
+   *       For instance, if working with row i, then the pivot element is aii.
+   *       If the pivot is zero, exchange that row with a row below it that does not
+   *       contain a zero in column i. If this is not possible, then an inverse
+   *       to that matrix does not exist.
+   *
+   *      5. Divide every element of row i by the pivot.
+   *
+   *      6. For every row below and  row i, replace that row with the sum of that row and
+   *       a multiple of row i so that each new element in column i below row i is zero.
+   *
+   *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
+   *       for every element below and above the main diagonal.
+   *
+   *    8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
+   *       Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
+   *----------------------------------------------------------------------------------------------------------------*/
+
+    /* Working pointer for destination matrix */
+    pOutT1 = pOut;
+
+    /* Loop over the number of rows */
+    rowCnt = numRows;
+
+    /* Making the destination matrix as identity matrix */
+    while (rowCnt > 0U)
+    {
+      /* Writing all zeroes in lower triangle of the destination matrix */
+      j = numRows - rowCnt;
+      while (j > 0U)
+      {
+        *pOutT1++ = 0.0f;
+        j--;
+      }
+
+      /* Writing all ones in the diagonal of the destination matrix */
+      *pOutT1++ = 1.0f;
+
+      /* Writing all zeroes in upper triangle of the destination matrix */
+      j = rowCnt - 1U;
+
+      while (j > 0U)
+      {
+        *pOutT1++ = 0.0f;
+        j--;
+      }
+
+      /* Decrement the loop counter */
+      rowCnt--;
+    }
+
+    /* Loop over the number of columns of the input matrix.
+       All the elements in each column are processed by the row operations */
+    loopCnt = numCols;
+
+    /* Index modifier to navigate through the columns */
+    l = 0U;
+
+    while (loopCnt > 0U)
+    {
+      /* Check if the pivot element is zero..
+       * If it is zero then interchange the row with non zero row below.
+       * If there is no non zero element to replace in the rows below,
+       * then the matrix is Singular. */
+
+      /* Working pointer for the input matrix that points
+       * to the pivot element of the particular row  */
+      pInT1 = pIn + (l * numCols);
+
+      /* Working pointer for the destination matrix that points
+       * to the pivot element of the particular row  */
+      pOutT1 = pOut + (l * numCols);
+
+      /* Temporary variable to hold the pivot value */
+      in = *pInT1;
+
+      /* Check if the pivot element is zero */
+      if (*pInT1 == 0.0f)
+      {
+        /* Loop over the number rows present below */
+        for (i = 1U; i < numRows - l; i++)
+        {
+          /* Update the input and destination pointers */
+          pInT2 = pInT1 + (numCols * i);
+          pOutT2 = pOutT1 + (numCols * i);
+
+          /* Check if there is a non zero pivot element to
+           * replace in the rows below */
+          if (*pInT2 != 0.0f)
+          {
+            /* Loop over number of columns
+             * to the right of the pilot element */
+            j = numCols - l;
+
+            while (j > 0U)
+            {
+              /* Exchange the row elements of the input matrix */
+              Xchg = *pInT2;
+              *pInT2++ = *pInT1;
+              *pInT1++ = Xchg;
+
+              /* Decrement the loop counter */
+              j--;
+            }
+
+            /* Loop over number of columns of the destination matrix */
+            j = numCols;
+
+            while (j > 0U)
+            {
+              /* Exchange the row elements of the destination matrix */
+              Xchg = *pOutT2;
+              *pOutT2++ = *pOutT1;
+              *pOutT1++ = Xchg;
+
+              /* Decrement the loop counter */
+              j--;
+            }
+
+            /* Flag to indicate whether exchange is done or not */
+            flag = 1U;
+
+            /* Break after exchange is done */
+            break;
+          }
+
+         
+        }
+      }
+
+      /* Update the status if the matrix is singular */
+      if ((flag != 1U) && (in == 0.0f))
+      {
+        return ARM_MATH_SINGULAR;
+      }
+
+      /* Points to the pivot row of input and destination matrices */
+      pPivotRowIn = pIn + (l * numCols);
+      pPivotRowDst = pOut + (l * numCols);
+
+      /* Temporary pointers to the pivot row pointers */
+      pInT1 = pPivotRowIn;
+      pInT2 = pPivotRowDst;
+
+      /* Pivot element of the row */
+      in = *pPivotRowIn;
+      tmpV = vdupq_n_f32(1.0f/in);
+
+      /* Loop over number of columns
+       * to the right of the pilot element */
+      j = (numCols - l) >> 2;
+
+      while (j > 0U)
+      {
+        /* Divide each element of the row of the input matrix
+         * by the pivot element */
+        vec1 = vld1q_f32(pInT1);
+
+        vec1 = vmulq_f32(vec1, tmpV);
+        vst1q_f32(pInT1, vec1);
+        pInT1 += 4;
+
+        /* Decrement the loop counter */
+        j--;
+      }
+
+      /* Tail */
+      j = (numCols - l) & 3;
+
+      while (j > 0U)
+      {
+        /* Divide each element of the row of the input matrix
+         * by the pivot element */
+        in1 = *pInT1;
+        *pInT1++ = in1 / in;
+
+        /* Decrement the loop counter */
+        j--;
+      }
+
+      /* Loop over number of columns of the destination matrix */
+      j = numCols >> 2;
+
+      while (j > 0U)
+      {
+        /* Divide each element of the row of the destination matrix
+         * by the pivot element */
+        vec1 = vld1q_f32(pInT2);
+
+        vec1 = vmulq_f32(vec1, tmpV);
+        vst1q_f32(pInT2, vec1);
+        pInT2 += 4;
+      
+        /* Decrement the loop counter */
+        j--;
+      }
+
+      /* Tail */
+      j = numCols & 3;
+
+      while (j > 0U)
+      {
+        /* Divide each element of the row of the destination matrix
+         * by the pivot element */
+        in1 = *pInT2;
+        *pInT2++ = in1 / in;
+
+        /* Decrement the loop counter */
+        j--;
+      }
+
+      /* Replace the rows with the sum of that row and a multiple of row i
+       * so that each new element in column i above row i is zero.*/
+
+      /* Temporary pointers for input and destination matrices */
+      pInT1 = pIn;
+      pInT2 = pOut;
+
+      /* index used to check for pivot element */
+      i = 0U;
+
+      /* Loop over number of rows */
+      /*  to be replaced by the sum of that row and a multiple of row i */
+      k = numRows;
+
+      while (k > 0U)
+      {
+        /* Check for the pivot element */
+        if (i == l)
+        {
+          /* If the processing element is the pivot element,
+             only the columns to the right are to be processed */
+          pInT1 += numCols - l;
+
+          pInT2 += numCols;
+        }
+        else
+        {
+          /* Element of the reference row */
+          in = *pInT1;
+          tmpV = vdupq_n_f32(in);
+
+          /* Working pointers for input and destination pivot rows */
+          pPRT_in = pPivotRowIn;
+          pPRT_pDst = pPivotRowDst;
+
+          /* Loop over the number of columns to the right of the pivot element,
+             to replace the elements in the input matrix */
+          j = (numCols - l) >> 2;
+	  
+          while (j > 0U)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            vec1 = vld1q_f32(pInT1);
+            vec2 = vld1q_f32(pPRT_in);
+            vec1 = vmlsq_f32(vec1, tmpV, vec2);
+            vst1q_f32(pInT1, vec1);
+            pPRT_in += 4;
+            pInT1 += 4;
+
+            /* Decrement the loop counter */
+            j--;
+          }
+
+	  /* Tail */
+          j = (numCols - l) & 3;
+
+          while (j > 0U)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            in1 = *pInT1;
+            *pInT1++ = in1 - (in * *pPRT_in++);
+
+            /* Decrement the loop counter */
+            j--;
+          }
+
+          /* Loop over the number of columns to
+             replace the elements in the destination matrix */
+          j = numCols >> 2;
+
+          while (j > 0U)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            vec1 = vld1q_f32(pInT2);
+            vec2 = vld1q_f32(pPRT_pDst);
+            vec1 = vmlsq_f32(vec1, tmpV, vec2);
+            vst1q_f32(pInT2, vec1);
+            pPRT_pDst += 4;
+            pInT2 += 4;
+
+            /* Decrement the loop counter */
+            j--;
+          }
+
+	  /* Tail */
+          j = numCols & 3;
+
+          while (j > 0U)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            in1 = *pInT2;
+            *pInT2++ = in1 - (in * *pPRT_pDst++);
+
+            /* Decrement the loop counter */
+            j--;
+          }
+
+        }
+
+        /* Increment the temporary input pointer */
+        pInT1 = pInT1 + l;
+
+        /* Decrement the loop counter */
+        k--;
+
+        /* Increment the pivot index */
+        i++;
+      }
+
+      /* Increment the input pointer */
+      pIn++;
+
+      /* Decrement the loop counter */
+      loopCnt--;
+
+      /* Increment the index modifier */
+      l++;
+    }
+
+    /* Set status as ARM_MATH_SUCCESS */
+    status = ARM_MATH_SUCCESS;
+
+    if ((flag != 1U) && (in == 0.0f))
+    {
+      pIn = pSrc->pData;
+      for (i = 0; i < numRows * numCols; i++)
+      {
+        if (pIn[i] != 0.0f)
+            break;
+      }
+
+      if (i == numRows * numCols)
+        status = ARM_MATH_SINGULAR;
+    }
+  }
+  /* Return to application */
+  return (status);
+}
+#else
+arm_status arm_mat_inverse_f32(
+  const arm_matrix_instance_f32 * pSrc,
+        arm_matrix_instance_f32 * pDst)
+{
+  float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */
+  float32_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */
+  float32_t *pOutT1, *pOutT2;                    /* Temporary output data matrix pointer */
+  float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */
+  uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
+  uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
+
+#if defined (ARM_MATH_DSP)
+
+  float32_t Xchg, in = 0.0f, in1;                /* Temporary input values  */
+  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k,l;      /* loop counters */
+  arm_status status;                             /* status of matrix inverse */
+
+#ifdef ARM_MATH_MATRIX_CHECK
+
+  /* Check for matrix mismatch condition */
+  if ((pSrc->numRows != pSrc->numCols) ||
+      (pDst->numRows != pDst->numCols) ||
+      (pSrc->numRows != pDst->numRows)   )
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+
+#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
+
+  {
+
+    /*--------------------------------------------------------------------------------------------------------------
+     * Matrix Inverse can be solved using elementary row operations.
+     *
+     *  Gauss-Jordan Method:
+     *
+     *      1. First combine the identity matrix and the input matrix separated by a bar to form an
+     *        augmented matrix as follows:
+     *                      _                  _         _         _
+     *                     |  a11  a12 | 1   0  |       |  X11 X12  |
+     *                     |           |        |   =   |           |
+     *                     |_ a21  a22 | 0   1 _|       |_ X21 X21 _|
+     *
+     *      2. In our implementation, pDst Matrix is used as identity matrix.
+     *
+     *      3. Begin with the first row. Let i = 1.
+     *
+     *      4. Check to see if the pivot for row i is zero.
+     *         The pivot is the element of the main diagonal that is on the current row.
+     *         For instance, if working with row i, then the pivot element is aii.
+     *         If the pivot is zero, exchange that row with a row below it that does not
+     *         contain a zero in column i. If this is not possible, then an inverse
+     *         to that matrix does not exist.
+     *
+     *      5. Divide every element of row i by the pivot.
+     *
+     *      6. For every row below and  row i, replace that row with the sum of that row and
+     *         a multiple of row i so that each new element in column i below row i is zero.
+     *
+     *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
+     *         for every element below and above the main diagonal.
+     *
+     *      8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
+     *         Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
+     *----------------------------------------------------------------------------------------------------------------*/
+
+    /* Working pointer for destination matrix */
+    pOutT1 = pOut;
+
+    /* Loop over the number of rows */
+    rowCnt = numRows;
+
+    /* Making the destination matrix as identity matrix */
+    while (rowCnt > 0U)
+    {
+      /* Writing all zeroes in lower triangle of the destination matrix */
+      j = numRows - rowCnt;
+      while (j > 0U)
+      {
+        *pOutT1++ = 0.0f;
+        j--;
+      }
+
+      /* Writing all ones in the diagonal of the destination matrix */
+      *pOutT1++ = 1.0f;
+
+      /* Writing all zeroes in upper triangle of the destination matrix */
+      j = rowCnt - 1U;
+      while (j > 0U)
+      {
+        *pOutT1++ = 0.0f;
+        j--;
+      }
+
+      /* Decrement loop counter */
+      rowCnt--;
+    }
+
+    /* Loop over the number of columns of the input matrix.
+       All the elements in each column are processed by the row operations */
+    loopCnt = numCols;
+
+    /* Index modifier to navigate through the columns */
+    l = 0U;
+
+    while (loopCnt > 0U)
+    {
+      /* Check if the pivot element is zero..
+       * If it is zero then interchange the row with non zero row below.
+       * If there is no non zero element to replace in the rows below,
+       * then the matrix is Singular. */
+
+      /* Working pointer for the input matrix that points
+       * to the pivot element of the particular row  */
+      pInT1 = pIn + (l * numCols);
+
+      /* Working pointer for the destination matrix that points
+       * to the pivot element of the particular row  */
+      pOutT1 = pOut + (l * numCols);
+
+      /* Temporary variable to hold the pivot value */
+      in = *pInT1;
+
+    
+
+      /* Check if the pivot element is zero */
+      if (*pInT1 == 0.0f)
+      {
+        /* Loop over the number rows present below */
+
+        for (i = 1U; i < numRows - l; i++)
+        {
+          /* Update the input and destination pointers */
+          pInT2 = pInT1 + (numCols * i);
+          pOutT2 = pOutT1 + (numCols * i);
+
+          /* Check if there is a non zero pivot element to
+           * replace in the rows below */
+          if (*pInT2 != 0.0f)
+          {
+            /* Loop over number of columns
+             * to the right of the pilot element */
+            j = numCols - l;
+
+            while (j > 0U)
+            {
+              /* Exchange the row elements of the input matrix */
+              Xchg = *pInT2;
+              *pInT2++ = *pInT1;
+              *pInT1++ = Xchg;
+
+              /* Decrement the loop counter */
+              j--;
+            }
+
+            /* Loop over number of columns of the destination matrix */
+            j = numCols;
+
+            while (j > 0U)
+            {
+              /* Exchange the row elements of the destination matrix */
+              Xchg = *pOutT2;
+              *pOutT2++ = *pOutT1;
+              *pOutT1++ = Xchg;
+
+              /* Decrement loop counter */
+              j--;
+            }
+
+            /* Flag to indicate whether exchange is done or not */
+            flag = 1U;
+
+            /* Break after exchange is done */
+            break;
+          }
+
+
+          /* Decrement loop counter */
+        }
+      }
+
+      /* Update the status if the matrix is singular */
+      if ((flag != 1U) && (in == 0.0f))
+      {
+        return ARM_MATH_SINGULAR;
+      }
+
+      /* Points to the pivot row of input and destination matrices */
+      pPivotRowIn = pIn + (l * numCols);
+      pPivotRowDst = pOut + (l * numCols);
+
+      /* Temporary pointers to the pivot row pointers */
+      pInT1 = pPivotRowIn;
+      pInT2 = pPivotRowDst;
+
+      /* Pivot element of the row */
+      in = *pPivotRowIn;
+
+      /* Loop over number of columns
+       * to the right of the pilot element */
+      j = (numCols - l);
+
+      while (j > 0U)
+      {
+        /* Divide each element of the row of the input matrix
+         * by the pivot element */
+        in1 = *pInT1;
+        *pInT1++ = in1 / in;
+
+        /* Decrement the loop counter */
+        j--;
+      }
+
+      /* Loop over number of columns of the destination matrix */
+      j = numCols;
+
+      while (j > 0U)
+      {
+        /* Divide each element of the row of the destination matrix
+         * by the pivot element */
+        in1 = *pInT2;
+        *pInT2++ = in1 / in;
+
+        /* Decrement the loop counter */
+        j--;
+      }
+
+      /* Replace the rows with the sum of that row and a multiple of row i
+       * so that each new element in column i above row i is zero.*/
+
+      /* Temporary pointers for input and destination matrices */
+      pInT1 = pIn;
+      pInT2 = pOut;
+
+      /* index used to check for pivot element */
+      i = 0U;
+
+      /* Loop over number of rows */
+      /*  to be replaced by the sum of that row and a multiple of row i */
+      k = numRows;
+
+      while (k > 0U)
+      {
+        /* Check for the pivot element */
+        if (i == l)
+        {
+          /* If the processing element is the pivot element,
+             only the columns to the right are to be processed */
+          pInT1 += numCols - l;
+
+          pInT2 += numCols;
+        }
+        else
+        {
+          /* Element of the reference row */
+          in = *pInT1;
+
+          /* Working pointers for input and destination pivot rows */
+          pPRT_in = pPivotRowIn;
+          pPRT_pDst = pPivotRowDst;
+
+          /* Loop over the number of columns to the right of the pivot element,
+             to replace the elements in the input matrix */
+          j = (numCols - l);
+
+          while (j > 0U)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            in1 = *pInT1;
+            *pInT1++ = in1 - (in * *pPRT_in++);
+
+            /* Decrement the loop counter */
+            j--;
+          }
+
+          /* Loop over the number of columns to
+             replace the elements in the destination matrix */
+          j = numCols;
+
+          while (j > 0U)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            in1 = *pInT2;
+            *pInT2++ = in1 - (in * *pPRT_pDst++);
+
+            /* Decrement loop counter */
+            j--;
+          }
+
+        }
+
+        /* Increment temporary input pointer */
+        pInT1 = pInT1 + l;
+
+        /* Decrement loop counter */
+        k--;
+
+        /* Increment pivot index */
+        i++;
+      }
+
+      /* Increment the input pointer */
+      pIn++;
+
+      /* Decrement the loop counter */
+      loopCnt--;
+
+      /* Increment the index modifier */
+      l++;
+    }
+
+
+#else
+
+  float32_t Xchg, in = 0.0f;                     /* Temporary input values  */
+  uint32_t i, rowCnt, flag = 0U, j, loopCnt, l;      /* loop counters */
+  arm_status status;                             /* status of matrix inverse */
+
+#ifdef ARM_MATH_MATRIX_CHECK
+
+  /* Check for matrix mismatch condition */
+  if ((pSrc->numRows != pSrc->numCols) ||
+      (pDst->numRows != pDst->numCols) ||
+      (pSrc->numRows != pDst->numRows)   )
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+
+#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
+
+  {
+
+    /*--------------------------------------------------------------------------------------------------------------
+     * Matrix Inverse can be solved using elementary row operations.
+     *
+     *  Gauss-Jordan Method:
+     *
+     *      1. First combine the identity matrix and the input matrix separated by a bar to form an
+     *        augmented matrix as follows:
+     *                      _  _          _     _      _   _         _         _
+     *                     |  |  a11  a12  | | | 1   0  |   |       |  X11 X12  |
+     *                     |  |            | | |        |   |   =   |           |
+     *                     |_ |_ a21  a22 _| | |_0   1 _|  _|       |_ X21 X21 _|
+     *
+     *      2. In our implementation, pDst Matrix is used as identity matrix.
+     *
+     *      3. Begin with the first row. Let i = 1.
+     *
+     *      4. Check to see if the pivot for row i is zero.
+     *         The pivot is the element of the main diagonal that is on the current row.
+     *         For instance, if working with row i, then the pivot element is aii.
+     *         If the pivot is zero, exchange that row with a row below it that does not
+     *         contain a zero in column i. If this is not possible, then an inverse
+     *         to that matrix does not exist.
+     *
+     *      5. Divide every element of row i by the pivot.
+     *
+     *      6. For every row below and  row i, replace that row with the sum of that row and
+     *         a multiple of row i so that each new element in column i below row i is zero.
+     *
+     *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
+     *         for every element below and above the main diagonal.
+     *
+     *      8. Now an identical matrix is formed to the left of the bar(input matrix, src).
+     *         Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
+     *----------------------------------------------------------------------------------------------------------------*/
+
+    /* Working pointer for destination matrix */
+    pOutT1 = pOut;
+
+    /* Loop over the number of rows */
+    rowCnt = numRows;
+
+    /* Making the destination matrix as identity matrix */
+    while (rowCnt > 0U)
+    {
+      /* Writing all zeroes in lower triangle of the destination matrix */
+      j = numRows - rowCnt;
+      while (j > 0U)
+      {
+        *pOutT1++ = 0.0f;
+        j--;
+      }
+
+      /* Writing all ones in the diagonal of the destination matrix */
+      *pOutT1++ = 1.0f;
+
+      /* Writing all zeroes in upper triangle of the destination matrix */
+      j = rowCnt - 1U;
+      while (j > 0U)
+      {
+        *pOutT1++ = 0.0f;
+        j--;
+      }
+
+      /* Decrement loop counter */
+      rowCnt--;
+    }
+
+    /* Loop over the number of columns of the input matrix.
+       All the elements in each column are processed by the row operations */
+    loopCnt = numCols;
+
+    /* Index modifier to navigate through the columns */
+    l = 0U;
+
+    while (loopCnt > 0U)
+    {
+      /* Check if the pivot element is zero..
+       * If it is zero then interchange the row with non zero row below.
+       * If there is no non zero element to replace in the rows below,
+       * then the matrix is Singular. */
+
+      /* Working pointer for the input matrix that points
+       * to the pivot element of the particular row  */
+      pInT1 = pIn + (l * numCols);
+
+      /* Working pointer for the destination matrix that points
+       * to the pivot element of the particular row  */
+      pOutT1 = pOut + (l * numCols);
+
+      /* Temporary variable to hold the pivot value */
+      in = *pInT1;
+
+      /* Check if the pivot element is zero */
+      if (*pInT1 == 0.0f)
+      {
+        /* Loop over the number rows present below */
+        for (i = 1U; i < numRows-l; i++)
+        {
+          /* Update the input and destination pointers */
+          pInT2 = pInT1 + (numCols * i);
+          pOutT2 = pOutT1 + (numCols * i);
+
+          /* Check if there is a non zero pivot element to
+           * replace in the rows below */
+          if (*pInT2 != 0.0f)
+          {
+            /* Loop over number of columns
+             * to the right of the pilot element */
+            for (j = 0U; j < (numCols - l); j++)
+            {
+              /* Exchange the row elements of the input matrix */
+              Xchg = *pInT2;
+              *pInT2++ = *pInT1;
+              *pInT1++ = Xchg;
+            }
+
+            for (j = 0U; j < numCols; j++)
+            {
+              Xchg = *pOutT2;
+              *pOutT2++ = *pOutT1;
+              *pOutT1++ = Xchg;
+            }
+
+            /* Flag to indicate whether exchange is done or not */
+            flag = 1U;
+
+            /* Break after exchange is done */
+            break;
+          }
+        }
+      }
+
+
+      /* Update the status if the matrix is singular */
+      if ((flag != 1U) && (in == 0.0f))
+      {
+        return ARM_MATH_SINGULAR;
+      }
+
+      /* Points to the pivot row of input and destination matrices */
+      pPivotRowIn = pIn + (l * numCols);
+      pPivotRowDst = pOut + (l * numCols);
+
+      /* Temporary pointers to the pivot row pointers */
+      pInT1 = pPivotRowIn;
+      pOutT1 = pPivotRowDst;
+
+      /* Pivot element of the row */
+      in = *(pIn + (l * numCols));
+
+      /* Loop over number of columns
+       * to the right of the pilot element */
+      for (j = 0U; j < (numCols - l); j++)
+      {
+        /* Divide each element of the row of the input matrix
+         * by the pivot element */
+        *pInT1 = *pInT1 / in;
+        pInT1++;
+      }
+      for (j = 0U; j < numCols; j++)
+      {
+        /* Divide each element of the row of the destination matrix
+         * by the pivot element */
+        *pOutT1 = *pOutT1 / in;
+        pOutT1++;
+      }
+
+      /* Replace the rows with the sum of that row and a multiple of row i
+       * so that each new element in column i above row i is zero.*/
+
+      /* Temporary pointers for input and destination matrices */
+      pInT1 = pIn;
+      pOutT1 = pOut;
+
+      for (i = 0U; i < numRows; i++)
+      {
+        /* Check for the pivot element */
+        if (i == l)
+        {
+          /* If the processing element is the pivot element,
+             only the columns to the right are to be processed */
+          pInT1 += numCols - l;
+          pOutT1 += numCols;
+        }
+        else
+        {
+          /* Element of the reference row */
+          in = *pInT1;
+
+          /* Working pointers for input and destination pivot rows */
+          pPRT_in = pPivotRowIn;
+          pPRT_pDst = pPivotRowDst;
+
+          /* Loop over the number of columns to the right of the pivot element,
+             to replace the elements in the input matrix */
+          for (j = 0U; j < (numCols - l); j++)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            *pInT1 = *pInT1 - (in * *pPRT_in++);
+            pInT1++;
+          }
+
+          /* Loop over the number of columns to
+             replace the elements in the destination matrix */
+          for (j = 0U; j < numCols; j++)
+          {
+            /* Replace the element by the sum of that row
+               and a multiple of the reference row  */
+            *pOutT1 = *pOutT1 - (in * *pPRT_pDst++);
+            pOutT1++;
+          }
+
+        }
+
+        /* Increment temporary input pointer */
+        pInT1 = pInT1 + l;
+      }
+
+      /* Increment the input pointer */
+      pIn++;
+
+      /* Decrement the loop counter */
+      loopCnt--;
+
+      /* Increment the index modifier */
+      l++;
+    }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+    /* Set status as ARM_MATH_SUCCESS */
+    status = ARM_MATH_SUCCESS;
+
+    if ((flag != 1U) && (in == 0.0f))
+    {
+      pIn = pSrc->pData;
+      for (i = 0; i < numRows * numCols; i++)
+      {
+        if (pIn[i] != 0.0f)
+            break;
+      }
+
+      if (i == numRows * numCols)
+        status = ARM_MATH_SINGULAR;
+    }
+  }
+
+  /* Return to application */
+  return (status);
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of MatrixInv group
+ */