CMSIS-NN: Add elementwise add/mul int16 and unit tests (#1419) * CMSIS-NN: Add elementwise add/mul int16 and unit tests Zero out offsets for add/mul s16 Co-authored-by: Annie Tallund <annie.tallund@arm.com> and Måns Nilsson <mans.nilsson@arm.com>

commit: 773cdffff47e6ffca2e9e4d5d422c2cd2570f26f [log] [tgz]
author: Måns Nilsson <mans.nilsson@arm.com> Thu Feb 17 15:11:54 2022 +0100
committer: GitHub <noreply@github.com> Thu Feb 17 16:11:54 2022 +0200
tree: 31b6c80ac1e17dd38753dbe747442dd989230acf
parent: addd45db29184b4da1efe90c1613fe830e3da921 [diff]
diff --git a/ARM.CMSIS.pdsc b/ARM.CMSIS.pdsc
index c1c9994..8a2af8c 100644
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc

@@ -18,6 +18,7 @@
        - Support dilation for int8 depthwise convolution
        - Support for int16 depthwise conv for reference implementation including dilation
        - Support for int16 average and max pooling for reference implementation
+       - Support for elementwise add and mul int16 scalar version
       CMSIS-RTOS2:
         - RTX 5.5.4 (see revision history for details)
     </release>
@@ -2849,7 +2850,9 @@
         <file category="source" name="CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c"/>
         <file category="source" name="CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c"/>
         <file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c"/>
+        <file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c"/>
         <file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c"/>
+        <file category="source" name="CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c"/>
         <file category="source" name="CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c"/>
         <file category="source" name="CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c"/>
         <file category="source" name="CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c"/>

diff --git a/CMSIS/NN/Include/arm_nnfunctions.h b/CMSIS/NN/Include/arm_nnfunctions.h
index 4d13c2f..c3d8f97 100644
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        7 February 2022
- * $Revision:    V.8.0.0
+ * $Date:        14 February 2022
+ * $Revision:    V.8.0.1
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -1603,27 +1603,27 @@
 /**
  * @defgroup BasicMath Basic math functions
  *
- * Element wise add and multiplication functions.
+ * Elementwise add and multiplication functions.
  *
  */
 
 /**
- * @brief s8 element wise add of two vectors
+ * @brief s8 elementwise add of two vectors
  * @param[in]       input_1_vect            pointer to input vector 1
  * @param[in]       input_2_vect            pointer to input vector 2
- * @param[in]       input_1_offset          offset for input 1. Range: Range: -127 to 128
+ * @param[in]       input_1_offset          offset for input 1. Range: -127 to 128
  * @param[in]       input_1_mult            multiplier for input 1
  * @param[in]       input_1_shift           shift for input 1
- * @param[in]       input_2_offset          offset for input 2. Range: Range: -127 to 128
+ * @param[in]       input_2_offset          offset for input 2. Range: -127 to 128
  * @param[in]       input_2_mult            multiplier for input 2
  * @param[in]       input_2_shift           shift for input 2
  * @param[in]       left_shift              input left shift
  * @param[in,out]   output                  pointer to output vector
- * @param[in]       out_offset              output offset
+ * @param[in]       out_offset              output offset.  Range: -128 to 127
  * @param[in]       out_mult                output multiplier
  * @param[in]       out_shift               output shift
- * @param[in]       out_activation_min      minimum value to clamp output to
- * @param[in]       out_activation_max      maximum value to clamp output to
+ * @param[in]       out_activation_min      minimum value to clamp output to. Min: -128
+ * @param[in]       out_activation_max      maximum value to clamp output to. Max: 127
  * @param[in]       block_size              number of samples
  * @return          The function returns    ARM_MATH_SUCCESS
  */
@@ -1642,20 +1642,57 @@
                                   const int32_t out_shift,
                                   const int32_t out_activation_min,
                                   const int32_t out_activation_max,
-                                  const uint32_t block_size);
+                                  const int32_t block_size);
 
 /**
- * @brief s8 element wise multiplication
+ * @brief s16 elementwise add of two vectors
  * @param[in]       input_1_vect            pointer to input vector 1
  * @param[in]       input_2_vect            pointer to input vector 2
- * @param[in]       input_1_offset          offset for input 1. Range: Range: -127 to 128
- * @param[in]       input_2_offset          offset for input 2. Range: Range: -127 to 128
+ * @param[in]       input_1_offset          offset for input 1. Not used.
+ * @param[in]       input_1_mult            multiplier for input 1
+ * @param[in]       input_1_shift           shift for input 1
+ * @param[in]       input_2_offset          offset for input 2. Not used.
+ * @param[in]       input_2_mult            multiplier for input 2
+ * @param[in]       input_2_shift           shift for input 2
+ * @param[in]       left_shift              input left shift
  * @param[in,out]   output                  pointer to output vector
- * @param[in]       out_offset              output offset
+ * @param[in]       out_offset              output offset. Not used.
  * @param[in]       out_mult                output multiplier
  * @param[in]       out_shift               output shift
- * @param[in]       out_activation_min      minimum value to clamp output to
- * @param[in]       out_activation_max      maximum value to clamp output to
+ * @param[in]       out_activation_min      minimum value to clamp output to. Min: -32768
+ * @param[in]       out_activation_max      maximum value to clamp output to. Max: 32767
+ * @param[in]       block_size              number of samples
+ * @return          The function returns    ARM_MATH_SUCCESS
+ */
+arm_status arm_elementwise_add_s16(const int16_t *input_1_vect,
+                                   const int16_t *input_2_vect,
+                                   const int32_t input_1_offset,
+                                   const int32_t input_1_mult,
+                                   const int32_t input_1_shift,
+                                   const int32_t input_2_offset,
+                                   const int32_t input_2_mult,
+                                   const int32_t input_2_shift,
+                                   const int32_t left_shift,
+                                   int16_t *output,
+                                   const int32_t out_offset,
+                                   const int32_t out_mult,
+                                   const int32_t out_shift,
+                                   const int32_t out_activation_min,
+                                   const int32_t out_activation_max,
+                                   const int32_t block_size);
+
+/**
+ * @brief s8 elementwise multiplication
+ * @param[in]       input_1_vect            pointer to input vector 1
+ * @param[in]       input_2_vect            pointer to input vector 2
+ * @param[in]       input_1_offset          offset for input 1. Range: -127 to 128
+ * @param[in]       input_2_offset          offset for input 2. Range: -127 to 128
+ * @param[in,out]   output                  pointer to output vector
+ * @param[in]       out_offset              output offset. Range: -128 to 127
+ * @param[in]       out_mult                output multiplier
+ * @param[in]       out_shift               output shift
+ * @param[in]       out_activation_min      minimum value to clamp output to. Min: -128
+ * @param[in]       out_activation_max      maximum value to clamp output to. Max: 127
  * @param[in]       block_size              number of samples
  * @return          The function returns    ARM_MATH_SUCCESS
  *
@@ -1671,7 +1708,37 @@
                                   const int32_t out_shift,
                                   const int32_t out_activation_min,
                                   const int32_t out_activation_max,
-                                  const uint32_t block_size);
+                                  const int32_t block_size);
+
+/**
+ * @brief s16 elementwise multiplication
+ * @param[in]       input_1_vect            pointer to input vector 1
+ * @param[in]       input_2_vect            pointer to input vector 2
+ * @param[in]       input_1_offset          offset for input 1. Not used.
+ * @param[in]       input_2_offset          offset for input 2. Not used.
+ * @param[in,out]   output                  pointer to output vector
+ * @param[in]       out_offset              output offset. Not used.
+ * @param[in]       out_mult                output multiplier
+ * @param[in]       out_shift               output shift
+ * @param[in]       out_activation_min      minimum value to clamp output to. Min: -32768
+ * @param[in]       out_activation_max      maximum value to clamp output to. Max: 32767
+ * @param[in]       block_size              number of samples
+ * @return          The function returns    ARM_MATH_SUCCESS
+ *
+ * @details   Supported framework: TensorFlow Lite micro
+ */
+arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
+                                   const int16_t *input_2_vect,
+                                   const int32_t input_1_offset,
+                                   const int32_t input_2_offset,
+                                   int16_t *output,
+                                   const int32_t out_offset,
+                                   const int32_t out_mult,
+                                   const int32_t out_shift,
+                                   const int32_t out_activation_min,
+                                   const int32_t out_activation_max,
+                                   const int32_t block_size);
+
 /**
  * @defgroup Acti Activation Functions
  *

diff --git a/CMSIS/NN/README.md b/CMSIS/NN/README.md
index 9ac6593..d8534bd 100644
--- a/CMSIS/NN/README.md
+++ b/CMSIS/NN/README.md

@@ -52,7 +52,9 @@
 |[Misc](https://arm-software.github.io/CMSIS_5/NN/html/group__groupNN.html)||||| |  ||
 ||arm_reshape_s8()| SOFTMAX | None | None | No | No | |
 ||arm_elementwise_add_s8()| ELEMENTWISE ADD | None | None | Yes| Yes| Reshape is not done in this function <br/> Only minor improvements are expected |
+||arm_elementwise_add_s16()| ELEMENTWISE ADD | None | None | No| No| Reshape is not done in this function <br/> Only minor improvements are expected |
 ||arm_elementwise_mul_s8()| ELEMENTWISE MUL | None | None | Yes| Yes| Reshape is not done in this function <br/> Only minor improvements are expected |
+||arm_elementwise_mul_s16()| ELEMENTWISE MUL | None | None | No| No| Reshape is not done in this function <br/> Only minor improvements are expected |
 ||arm_relu_q7() | RELU | None | None | Yes| No|
 ||arm_relu6_s8() | RELU | None | None | Yes| No|
 |[Concat](https://arm-software.github.io/CMSIS_5/NN/html/group__groupNN.html)||||| |  ||

diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c
new file mode 100644
index 0000000..6b1366d
--- /dev/null
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c

@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_elementwise_add_s16
+ * Description:  Elementwise add
+ *
+ * $Date:        14 Februari 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup BasicMath
+ * @{
+ */
+
+/*
+ * s16 elementwise add
+ *
+ * Refer header file for details.
+ *
+ */
+
+/* Note: __SHIFT is expected to be <=0 */
+
+arm_status arm_elementwise_add_s16(const int16_t *input_1_vect,
+                                   const int16_t *input_2_vect,
+                                   const int32_t input_1_offset,
+                                   const int32_t input_1_mult,
+                                   const int32_t input_1_shift,
+                                   const int32_t input_2_offset,
+                                   const int32_t input_2_mult,
+                                   const int32_t input_2_shift,
+                                   const int32_t left_shift,
+                                   int16_t *output,
+                                   const int32_t out_offset,
+                                   const int32_t out_mult,
+                                   const int32_t out_shift,
+                                   const int32_t out_activation_min,
+                                   const int32_t out_activation_max,
+                                   const int32_t block_size)
+{
+    (void)input_1_offset;
+    (void)input_2_offset;
+    (void)out_offset;
+    int32_t loop_count;
+    int32_t input_1;
+    int32_t input_2;
+    int32_t sum;
+
+    loop_count = block_size;
+
+    while (loop_count > 0)
+    {
+        /* C = A + B */
+        input_1 = *input_1_vect++ << left_shift;
+        input_2 = *input_2_vect++ << left_shift;
+
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
+
+        sum = input_1 + input_2;
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
+
+        sum = MAX(sum, out_activation_min);
+        sum = MIN(sum, out_activation_max);
+
+        *output++ = (int16_t)sum;
+
+        /* Decrement loop counter */
+        loop_count--;
+    }
+
+    return (ARM_MATH_SUCCESS);
+}
+
+/**
+ * @} end of BasicMath group
+ */

diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
index 6bade7b..13b6bb3 100644
--- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -19,10 +19,10 @@
 /* ----------------------------------------------------------------------
  * Project:      CMSIS NN Library
  * Title:        arm_elementwise_add_s8
- * Description:  Element wise add
+ * Description:  Elementwise add
  *
- * $Date:        20. July 2021
- * $Revision:    V.2.5.4
+ * $Date:        3 Februari 2022
+ * $Revision:    V.2.6.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -31,21 +31,6 @@
 #include "arm_nnfunctions.h"
 #include "arm_nnsupportfunctions.h"
 
-#if defined(ARM_MATH_MVEI)
-#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT)                                                                     \
-    __INPUT_V = arm_doubling_high_mult_mve(__INPUT_V, __MULT);                                                         \
-    __INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT);
-#endif
-
-/**
- * @note The *_no_sat API does not mean that the input not saturated, Since
- *       __MULT is a positive integer, it is saturated. The API definition
- *       has more info about it.
- */
-#define SAT_INPUT(__INPUT, __MULT, __SHIFT)                                                                            \
-    __INPUT = arm_nn_doubling_high_mult_no_sat(__INPUT, __MULT);                                                       \
-    __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
-
 /**
  *  @ingroup groupNN
  */
@@ -56,7 +41,7 @@
  */
 
 /*
- * s8 element wise add
+ * s8 elementwise add
  *
  * Refer header file for details.
  *
@@ -79,10 +64,10 @@
                                   const int32_t out_shift,
                                   const int32_t out_activation_min,
                                   const int32_t out_activation_max,
-                                  const uint32_t block_size)
+                                  const int32_t block_size)
 {
 #if defined(ARM_MATH_MVEI)
-    int32_t count = (int32_t)block_size;
+    int32_t count = block_size;
 
     while (count > 0)
     {
@@ -100,11 +85,11 @@
         vect_1 = vshlq_r_s32(vect_1, left_shift);
         vect_2 = vshlq_r_s32(vect_2, left_shift);
 
-        SAT_INPUT_VECT(vect_1, input_1_mult, input_1_shift);
-        SAT_INPUT_VECT(vect_2, input_2_mult, input_2_shift);
+        vect_1 = arm_requantize_mve(vect_1, input_1_mult, input_1_shift);
+        vect_2 = arm_requantize_mve(vect_2, input_2_mult, input_2_shift);
 
         vect_1 = vaddq_s32(vect_1, vect_2);
-        SAT_INPUT_VECT(vect_1, out_mult, out_shift);
+        vect_1 = arm_requantize_mve(vect_1, out_mult, out_shift);
 
         vect_1 = vaddq_n_s32(vect_1, out_offset);
 
@@ -119,7 +104,7 @@
         count -= 4;
     }
 #else
-    uint32_t loop_count;
+    int32_t loop_count;
     int32_t input_1;
     int32_t input_2;
     int32_t sum;
@@ -136,7 +121,7 @@
 
     loop_count = block_size >> 2;
 
-    while (loop_count > 0U)
+    while (loop_count > 0)
     {
         /* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension
            intrinsic */
@@ -152,13 +137,13 @@
         /* Sum 1 */
         input_1 = (b_1 & 0x0FFFF) << left_shift;
 
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
 
         input_2 = (b_2 & 0x0FFFF) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
@@ -166,13 +151,13 @@
 
         /* Sum 3 */
         input_1 = ((b_1 >> 16) & 0x0FFFF) << left_shift;
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
 
         input_2 = ((b_2 >> 16) & 0x0FFFF) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
@@ -180,13 +165,13 @@
 
         /* Sum 2 */
         input_1 = (a_1 & 0x0FFFF) << left_shift;
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
 
         input_2 = (a_2 & 0x0FFFF) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
@@ -194,13 +179,13 @@
 
         /* Sum 4 */
         input_1 = ((a_1 >> 16) & 0x0FFFF) << left_shift;
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
 
         input_2 = ((a_2 >> 16) & 0x0FFFF) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
@@ -216,21 +201,18 @@
     loop_count = block_size;
 #endif
 
-    while (loop_count > 0U)
+    while (loop_count > 0)
     {
         /* C = A + B */
 
         input_1 = (*input_1_vect++ + input_1_offset) << left_shift;
         input_2 = (*input_2_vect++ + input_2_offset) << left_shift;
 
-        input_1 = arm_nn_doubling_high_mult(input_1, input_1_mult);
-        input_1 = arm_nn_divide_by_power_of_two(input_1, -input_1_shift);
-
-        input_2 = arm_nn_doubling_high_mult(input_2, input_2_mult);
-        input_2 = arm_nn_divide_by_power_of_two(input_2, -input_2_shift);
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
 
         sum = MAX(sum, out_activation_min);

diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c
new file mode 100644
index 0000000..4e25574
--- /dev/null
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c

@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_elementwise_mul_s16
+ * Description:  Element wise multiplication
+ *
+ * $Date:        14 Februari 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup BasicMath
+ * @{
+ */
+
+/**
+ * @brief s16 element wise multiplication of two vectors
+ *
+ * @note   Refer header file for details.
+ *
+ */
+arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
+                                   const int16_t *input_2_vect,
+                                   const int32_t input_1_offset,
+                                   const int32_t input_2_offset,
+                                   int16_t *output,
+                                   const int32_t out_offset,
+                                   const int32_t out_mult,
+                                   const int32_t out_shift,
+                                   const int32_t out_activation_min,
+                                   const int32_t out_activation_max,
+                                   const int32_t block_size)
+{
+    (void)input_1_offset;
+    (void)input_2_offset;
+    (void)out_offset;
+    int32_t loop_count;
+    int32_t input_1;
+    int32_t input_2;
+    int32_t mul_res;
+
+    loop_count = block_size;
+
+    while (loop_count > 0)
+    {
+        /* C = A * B */
+
+        input_1 = *input_1_vect++;
+        input_2 = *input_2_vect++;
+
+        mul_res = input_1 * input_2;
+        mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
+
+        mul_res = MAX(mul_res, out_activation_min);
+        mul_res = MIN(mul_res, out_activation_max);
+
+        *output++ = (int16_t)mul_res;
+
+        /* Decrement loop counter */
+        loop_count--;
+    }
+
+    return ARM_MATH_SUCCESS;
+}
+
+/**
+ * @} end of BasicMath group
+ */

diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
index 3e3a63b..ff04cbf 100644
--- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_elementwise_mul_s8
  * Description:  Element wise multiplication
  *
- * $Date:        July 20, 2021
- * $Revision:    V.1.0.6
+ * $Date:        3 Februari 2022
+ * $Revision:    V.1.1.0
  *
  * Target Processor:  Cortex-M cores
  *
@@ -57,7 +57,7 @@
                                   const int32_t out_shift,
                                   const int32_t out_activation_min,
                                   const int32_t out_activation_max,
-                                  const uint32_t block_size)
+                                  const int32_t block_size)
 {
 
     int32_t loop_count;

diff --git a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
index 707f46e..164d515 100644
--- a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
+++ b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt

@@ -66,20 +66,24 @@
     set_property(GLOBAL PROPERTY cmsis_nn_unit_test_executables "${tmp}")
 endfunction(add_cmsis_nn_unit_test_executable)
 
-add_subdirectory(TestCases/test_arm_avgpool_s8)
 add_subdirectory(TestCases/test_arm_avgpool_s16)
+add_subdirectory(TestCases/test_arm_avgpool_s8)
 add_subdirectory(TestCases/test_arm_convolve_1x1_s8_fast)
-add_subdirectory(TestCases/test_arm_convolve_s8)
-add_subdirectory(TestCases/test_arm_convolve_s16)
 add_subdirectory(TestCases/test_arm_convolve_fast_s16)
+add_subdirectory(TestCases/test_arm_convolve_s16)
+add_subdirectory(TestCases/test_arm_convolve_s8)
 add_subdirectory(TestCases/test_arm_depthwise_conv_3x3_s8)
+add_subdirectory(TestCases/test_arm_depthwise_conv_s16)
 add_subdirectory(TestCases/test_arm_depthwise_conv_s8)
 add_subdirectory(TestCases/test_arm_depthwise_conv_s8_opt)
-add_subdirectory(TestCases/test_arm_depthwise_conv_s16)
-add_subdirectory(TestCases/test_arm_fully_connected_s8)
+add_subdirectory(TestCases/test_arm_elementwise_add_s16)
+add_subdirectory(TestCases/test_arm_elementwise_add_s8)
+add_subdirectory(TestCases/test_arm_elementwise_mul_s16)
+add_subdirectory(TestCases/test_arm_elementwise_mul_s8)
 add_subdirectory(TestCases/test_arm_fully_connected_s16)
-add_subdirectory(TestCases/test_arm_max_pool_s8)
+add_subdirectory(TestCases/test_arm_fully_connected_s8)
 add_subdirectory(TestCases/test_arm_max_pool_s16)
+add_subdirectory(TestCases/test_arm_max_pool_s8)
 add_subdirectory(TestCases/test_arm_softmax_s8)
 add_subdirectory(TestCases/test_arm_svdf_s8)
 

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/input.txt
new file mode 100644
index 0000000..b36be40
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/input.txt

@@ -0,0 +1,17 @@
+# 1,4,4,8
+-9.295782470703125000e+01,-1.086945724487304688e+02,6.923994445800781250e+01,-1.057811355590820312e+02,3.317919921875000000e+01,9.666183471679687500e+00,1.256012420654296875e+02,-1.729664611816406250e+01
+-1.201002273559570312e+02,8.354003906250000000e+00,1.095191192626953125e+02,-5.217841339111328125e+01,6.855964660644531250e+01,-1.136402664184570312e+02,-9.744009399414062500e+01,9.253901672363281250e+01
+7.336495971679687500e+01,-5.832271575927734375e+01,7.275946044921875000e+01,5.621168518066406250e+01,-2.811369323730468750e+01,-1.064083709716796875e+02,-5.671269226074218750e+01,1.013197326660156250e+02
+-5.051823425292968750e+01,-5.165785789489746094e+00,-8.920204162597656250e+01,-2.471372985839843750e+01,5.408367919921875000e+01,-3.100613403320312500e+01,1.517187500000000000e+01,1.175676574707031250e+02
+-6.835147094726562500e+01,3.722920227050781250e+01,7.254766845703125000e+01,4.179951477050781250e+01,-6.894918823242187500e+01,1.138137664794921875e+02,-1.040448760986328125e+01,8.314089965820312500e+01
+3.977279901504516602e+00,6.227731323242187500e+01,1.250961456298828125e+02,5.788429260253906250e+01,1.245972595214843750e+02,-1.194153518676757812e+02,-6.506634521484375000e+01,9.341912841796875000e+01
+-1.228842010498046875e+02,1.207274169921875000e+02,1.059566955566406250e+02,-8.747183227539062500e+01,1.015738677978515625e+02,-7.528302764892578125e+01,6.476969909667968750e+01,-8.886234283447265625e+01
+-6.244555664062500000e+01,1.225449371337890625e+02,-2.085887908935546875e+01,6.471858215332031250e+01,-4.115431976318359375e+01,-5.191817474365234375e+01,-9.805821228027343750e+01,2.599142456054687500e+01
+3.284560918807983398e+00,8.422149658203125000e+01,1.475296020507812500e+01,-3.306756591796875000e+01,1.119789123535156250e+02,-3.768267822265625000e+01,-7.441608428955078125e+01,-1.155044555664062500e+01
+3.421226501464843750e+01,1.218995819091796875e+02,3.512068176269531250e+01,3.007461547851562500e+01,-4.178165435791015625e+01,-7.703953552246093750e+01,-4.402140808105468750e+01,-1.098309249877929688e+02
+-1.149062347412109375e+01,3.958906555175781250e+01,3.185077095031738281e+01,6.083290100097656250e+01,1.213119812011718750e+02,4.139105224609375000e+01,-4.911738586425781250e+01,5.071537780761718750e+01
+3.887196350097656250e+01,-2.090080261230468750e+01,-1.239508743286132812e+02,3.305541992187500000e+01,1.023658294677734375e+02,8.029883575439453125e+01,1.267021331787109375e+02,1.110096435546875000e+02
+-7.935501098632812500e+00,4.101362609863281250e+01,1.688294982910156250e+01,1.209116363525390625e+02,6.502104187011718750e+01,-6.779858398437500000e+01,1.091404724121093750e+01,-8.844725036621093750e+01
+-1.136138763427734375e+02,1.071553039550781250e+02,6.892318725585937500e+01,-1.203222808837890625e+02,-7.168304443359375000e+01,-1.028485412597656250e+02,1.059083099365234375e+02,9.927880859375000000e+01
+-9.445242309570312500e+01,1.253308563232421875e+02,-1.191015472412109375e+02,9.833372497558593750e+01,-4.784445953369140625e+01,4.664404296875000000e+01,-2.272537231445312500e+01,-1.256185607910156250e+02
+-1.015063018798828125e+02,9.903826904296875000e+01,-2.011074829101562500e+01,4.990921020507812500e+00,-1.195664048194885254e+00,-1.579566955566406250e+00,6.589109802246093750e+01,-1.080894927978515625e+02

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/kernel.txt
new file mode 100644
index 0000000..3961796
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add/kernel.txt

@@ -0,0 +1,17 @@
+# 1,4,4,8
+7.206729125976562500e+01,7.762304687500000000e+01,-7.801148986816406250e+01,-3.279899597167968750e+01,-3.281203460693359375e+01,4.526036071777343750e+01,1.080221099853515625e+02,-4.926947021484375000e+01
+1.214063110351562500e+02,4.310248184204101562e+01,-2.534054565429687500e+01,-4.714800262451171875e+01,3.566201782226562500e+01,1.475189208984375000e+01,8.790599060058593750e+01,3.113238525390625000e+01
+2.757206726074218750e+01,9.667665100097656250e+01,-1.555439758300781250e+01,7.675241088867187500e+01,-3.137766265869140625e+01,7.716320800781250000e+01,1.087682495117187500e+02,1.146668701171875000e+02
+9.014279174804687500e+01,2.376081848144531250e+01,3.309667968750000000e+01,1.097281341552734375e+02,-7.206906890869140625e+01,1.277008056640625000e+01,1.199069671630859375e+02,-4.403179931640625000e+01
+-1.938705444335937500e+01,-5.568097686767578125e+01,-7.269144439697265625e+01,9.460415649414062500e+01,-2.219662475585937500e+01,6.548899841308593750e+01,1.033119506835937500e+02,4.380880737304687500e+01
+-9.079716491699218750e+01,-7.181563568115234375e+01,5.841765880584716797e+00,1.155065612792968750e+02,-5.521842956542968750e+00,-5.773645019531250000e+01,5.913287353515625000e+01,7.079254150390625000e+01
+5.248806762695312500e+01,7.888586425781250000e+01,7.365135192871093750e+01,4.194131469726562500e+01,1.048916320800781250e+02,-6.166264343261718750e+01,8.936982727050781250e+01,4.464817810058593750e+01
+3.714846801757812500e+01,7.748014831542968750e+01,9.133015441894531250e+01,-2.577056884765625000e+00,7.319335937500000000e+00,-1.151926803588867188e+02,4.999467468261718750e+01,1.210927124023437500e+02
+-4.962999725341796875e+01,1.540466308593750000e+01,7.622338867187500000e+01,-1.218276443481445312e+02,1.004155273437500000e+02,1.104693603515625000e+01,-3.938659667968750000e+01,-4.596747589111328125e+01
+8.642079162597656250e+01,-3.542127990722656250e+01,1.208909606933593750e+02,-6.531406402587890625e+01,-9.495910644531250000e+00,-1.270257949829101562e+02,2.849827575683593750e+01,1.113162689208984375e+02
+2.672088623046875000e+01,1.042509613037109375e+02,1.162126464843750000e+02,8.939810180664062500e+01,3.087474060058593750e+01,9.776967620849609375e+01,-1.378107452392578125e+01,-1.244079132080078125e+02
+-1.016086807250976562e+02,9.447311401367187500e+01,3.661959838867187500e+01,-8.664979553222656250e+01,-7.529193115234375000e+01,4.999642944335937500e+01,6.983352184295654297e+00,4.055065917968750000e+01
+9.580595397949218750e+01,9.963572692871093750e+01,-7.723803710937500000e+01,-6.269092559814453125e+01,-7.430888366699218750e+01,-8.858144378662109375e+01,-1.248384170532226562e+02,9.718597412109375000e+01
+-3.876348876953125000e+01,1.225116882324218750e+02,-5.063705921173095703e+00,3.252441883087158203e+00,6.319960021972656250e+01,-9.489593505859375000e+01,2.160478210449218750e+01,1.838577270507812500e+01
+1.143972625732421875e+02,-6.992826843261718750e+01,2.026805114746093750e+01,1.951538085937500000e+01,-1.215406799316406250e+02,3.057399988174438477e-01,-5.957047271728515625e+01,-6.521572875976562500e+01
+-6.931716918945312500e+01,-7.156071472167968750e+01,-2.351660919189453125e+01,5.988192749023437500e+01,1.015448608398437500e+02,-5.560777282714843750e+01,-8.696453857421875000e+01,-6.869393920898437500e+01

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/input.txt
new file mode 100644
index 0000000..f2902eb
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/input.txt

@@ -0,0 +1,17 @@
+# 1,4,4,8
+-9.079525390625000000e+03,2.064835546875000000e+04,-2.259560156250000000e+04,2.079294921875000000e+04,-2.136208789062500000e+04,-1.937610156250000000e+04,-3.269643359375000000e+04,-5.105984375000000000e+03
+-2.980064648437500000e+04,-2.159199218750000000e+04,4.030812500000000000e+03,-6.140292968750000000e+02,-2.234023828125000000e+04,-1.626913476562500000e+04,-1.295953710937500000e+04,1.518757812500000000e+03
+2.180444921875000000e+04,1.776383203125000000e+04,-1.697196093750000000e+04,-2.542497851562500000e+04,2.223205078125000000e+04,3.293941406250000000e+03,-1.414907226562500000e+04,1.225985937500000000e+04
+2.947183984375000000e+04,-1.136223242187500000e+04,2.756738281250000000e+03,-1.572518945312500000e+04,2.270083984375000000e+04,4.338339843750000000e+03,-9.487113281250000000e+03,-8.780263671875000000e+03
+9.803382812500000000e+03,1.046065234375000000e+04,1.408824609375000000e+04,-2.808096093750000000e+04,-1.423136914062500000e+04,2.328106640625000000e+04,-1.637939843750000000e+04,2.065651953125000000e+04
+-8.372460937500000000e+01,-5.054376953125000000e+03,-2.159832812500000000e+04,-8.347935546875000000e+03,-2.524527148437500000e+04,-1.336928906250000000e+04,7.721703125000000000e+03,-3.313300781250000000e+02
+3.246500390625000000e+04,-5.503087890625000000e+03,-1.652899023437500000e+04,3.107313671875000000e+04,5.627023437500000000e+03,1.666355859375000000e+04,2.707944531250000000e+04,2.323585546875000000e+04
+2.862367968750000000e+04,-5.446712890625000000e+03,-1.377110156250000000e+04,2.801305078125000000e+04,-6.471230468750000000e+02,2.540804687500000000e+03,2.545875000000000000e+03,-9.142298828125000000e+03
+2.996301953125000000e+04,2.687830859375000000e+04,-2.485566796875000000e+04,2.173520703125000000e+04,2.579554296875000000e+04,1.922597265625000000e+04,-2.404469531250000000e+04,3.844480468750000000e+03
+1.797230468750000000e+04,-1.486748437500000000e+04,-1.068930468750000000e+04,2.421027539062500000e+04,2.109536718750000000e+04,1.265168359375000000e+04,-1.248634765625000000e+04,-5.862738281250000000e+03
+-8.676210937500000000e+03,-5.523666015625000000e+03,-1.855750585937500000e+04,6.297933593750000000e+03,9.717687500000000000e+03,-7.208500000000000000e+03,-4.163576171875000000e+03,-1.354507421875000000e+04
+1.848515625000000000e+04,7.531996093750000000e+03,2.911646093750000000e+04,-1.695522656250000000e+04,-2.668137500000000000e+04,-1.033607617187500000e+04,1.683922656250000000e+04,2.731626953125000000e+04
+-1.629326757812500000e+04,-5.270310546875000000e+03,1.882841406250000000e+04,1.289742968750000000e+04,2.894542578125000000e+04,9.207265625000000000e+02,4.623945312500000000e+03,-1.920060546875000000e+04
+-2.503443750000000000e+04,-1.970680078125000000e+04,-1.017802343750000000e+04,-1.756417773437500000e+04,-3.641914062500000000e+03,1.925619921875000000e+04,9.496691406250000000e+03,2.083481640625000000e+04
+1.777793359375000000e+04,-6.373394531250000000e+03,1.066639062500000000e+04,-2.271009765625000000e+04,6.487472656250000000e+03,-2.858315820312500000e+04,2.349269531250000000e+03,-3.056509570312500000e+04
+-2.536204296875000000e+04,-2.154826562500000000e+04,3.162791406250000000e+04,-8.802865234375000000e+03,-3.153237890625000000e+04,1.109749609375000000e+04,-7.745958984375000000e+03,-1.160646484375000000e+03

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/kernel.txt
new file mode 100644
index 0000000..e01cab9
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/add_s16/kernel.txt

@@ -0,0 +1,17 @@
+# 1,4,4,8
+-2.510621875000000000e+04,-8.289482421875000000e+03,-1.004274609375000000e+04,1.211134765625000000e+04,2.726040625000000000e+04,4.021187500000000000e+03,-1.736442968750000000e+04,3.379667968750000000e+03
+5.150546875000000000e+02,-1.722215234375000000e+04,6.376589843750000000e+03,8.065445312500000000e+03,-1.617891796875000000e+04,-2.077441015625000000e+04,-2.789935546875000000e+04,-2.434612109375000000e+04
+2.390995703125000000e+04,-1.405164453125000000e+04,4.294128906250000000e+03,2.591995703125000000e+04,-2.936564648437500000e+04,1.356523046875000000e+04,-1.808466015625000000e+04,8.300250000000000000e+03
+1.956511718750000000e+04,-3.172256250000000000e+04,1.644530468750000000e+04,-2.475911328125000000e+04,-1.322433593750000000e+03,2.335518359375000000e+04,3.142789062500000000e+03,3.228850781250000000e+04
+-1.646988281250000000e+04,-1.530034375000000000e+04,8.058761718750000000e+03,2.689437890625000000e+04,-2.077601953125000000e+04,-1.135917773437500000e+04,-2.272833984375000000e+04,1.609108984375000000e+04
+-3.111657226562500000e+04,3.138480859375000000e+04,-2.444791015625000000e+03,-2.954947851562500000e+04,-1.540992187500000000e+04,-1.874251953125000000e+04,-3.189831835937500000e+04,-3.276591406250000000e+04
+2.404804687500000000e+02,-8.225742187500000000e+02,1.046314453125000000e+04,1.268813281250000000e+04,-2.960290039062500000e+04,-3.183902929687500000e+04,2.186680078125000000e+04,-3.173025781250000000e+04
+-4.092718750000000000e+03,2.163746484375000000e+04,-2.644751953125000000e+04,-3.250920703125000000e+04,2.340273828125000000e+04,2.503908593750000000e+04,3.030681250000000000e+04,-3.101775390625000000e+04
+3.055777734375000000e+04,6.341121093750000000e+03,-5.359464843750000000e+03,2.628112890625000000e+04,-1.890782812500000000e+04,-1.919164453125000000e+04,-6.493166015625000000e+03,2.822343750000000000e+04
+5.650789062500000000e+03,2.273074218750000000e+03,3.781004150390625000e+03,4.078617187500000000e+03,-3.150773828125000000e+04,2.127483203125000000e+04,-2.997192578125000000e+04,-2.671396679687500000e+04
+-4.630195312500000000e+03,-6.975160156250000000e+03,-8.764160156250000000e+02,-2.242787695312500000e+04,-2.371823046875000000e+04,-3.140927148437500000e+04,-3.152499414062500000e+04,-1.458019140625000000e+04
+1.876931640625000000e+04,1.524257812500000000e+03,2.807634375000000000e+04,-2.026568359375000000e+04,1.982835937500000000e+03,-3.155298828125000000e+04,2.618632812500000000e+03,1.168980468750000000e+03
+-1.691392187500000000e+04,3.104346484375000000e+04,1.788054687500000000e+04,-2.306708593750000000e+04,2.814215625000000000e+04,1.514268359375000000e+04,-2.964392822265625000e+03,2.067519921875000000e+04
+3.525765625000000000e+03,-3.185146679687500000e+04,5.391292968750000000e+03,6.632703125000000000e+03,-2.413116406250000000e+04,1.372278125000000000e+04,1.524700000000000000e+04,2.658232031250000000e+04
+-2.227673828125000000e+04,-6.355027343750000000e+03,1.518551953125000000e+04,1.752178125000000000e+04,2.853054296875000000e+04,-1.196411523437500000e+04,-6.326130859375000000e+03,-1.093160742187500000e+04
+3.191719140625000000e+04,2.933204296875000000e+04,-1.924806640625000000e+04,-6.136812500000000000e+03,2.344466406250000000e+04,2.127325390625000000e+04,1.861765234375000000e+04,6.950957031250000000e+03

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/input.txt
new file mode 100644
index 0000000..3e8ed64
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/input.txt

@@ -0,0 +1,21 @@
+# 1,5,4,8
+6.087265014648437500e+00,4.634196472167968750e+01,2.089966058731079102e+00,-1.591230773925781250e+01,-6.946073913574218750e+01,1.575343322753906250e+01,1.107122802734375000e+02,1.175067749023437500e+02
+-4.076422119140625000e+01,-4.779721832275390625e+01,-9.678024291992187500e+00,-1.763525390625000000e+01,1.265813293457031250e+02,-1.080362701416015625e+02,8.487304687500000000e+00,-4.212655639648437500e+01
+-9.519744873046875000e+00,-1.702813720703125000e+01,-6.921460032463073730e-01,-1.264048156738281250e+02,1.116436996459960938e+02,1.142576599121093750e+01,1.058777313232421875e+02,9.793907165527343750e+01
+-5.287223815917968750e+00,-1.031986083984375000e+02,4.567663574218750000e+01,-2.745009613037109375e+01,6.937374877929687500e+01,-1.074696197509765625e+02,1.659455871582031250e+01,-5.155500030517578125e+01
+1.225355682373046875e+02,3.871371459960937500e+01,-7.985780334472656250e+01,8.569786071777343750e+01,-4.570547485351562500e+01,2.623266601562500000e+01,6.994493103027343750e+01,3.592803955078125000e+00
+7.499845886230468750e+01,-3.264853954315185547e+00,-9.696961975097656250e+01,-8.154697418212890625e+01,1.242909317016601562e+02,7.681944274902343750e+01,1.141309051513671875e+02,-1.198213195800781250e+02
+1.118294219970703125e+02,1.540080261230468750e+01,3.581042480468750000e+01,-7.329165649414062500e+01,-1.510517120361328125e+01,-2.156777191162109375e+01,3.607449340820312500e+01,-7.490602111816406250e+01
+-2.859937286376953125e+01,3.708404064178466797e+00,5.488592529296875000e+01,-5.864614868164062500e+01,8.717007446289062500e+01,-8.396488952636718750e+01,1.024245910644531250e+02,4.356243896484375000e+01
+-1.126940026879310608e-01,-8.146022033691406250e+01,5.181336975097656250e+01,-9.854276275634765625e+01,-9.841476440429687500e+00,-5.197341918945312500e+00,7.240777587890625000e+01,1.021828842163085938e+02
+6.944224548339843750e+01,7.731994152069091797e+00,-3.599164581298828125e+01,-4.081647491455078125e+01,-1.188762054443359375e+02,-6.660906982421875000e+01,2.065304565429687500e+01,1.003328552246093750e+02
+-7.838453674316406250e+01,5.980604553222656250e+01,6.418049621582031250e+01,-9.981350708007812500e+01,-2.877880859375000000e+01,3.104080200195312500e+01,6.499343872070312500e+01,8.385769653320312500e+01
+-4.646399974822998047e+00,1.077828216552734375e+02,-6.764379882812500000e+01,1.130384826660156250e+02,-2.629809570312500000e+01,1.142396392822265625e+02,3.708171081542968750e+01,-5.075689697265625000e+01
+-2.185798645019531250e+01,-1.192111968994140625e+01,-1.044805679321289062e+02,-5.409135437011718750e+01,1.236510467529296875e+02,-9.662140655517578125e+01,-1.060680007934570312e+02,-5.500813293457031250e+01
+-1.237120437622070312e+02,1.238904876708984375e+02,7.293537902832031250e+01,-5.333233642578125000e+01,8.378683471679687500e+01,-1.104943389892578125e+02,9.256475830078125000e+01,-1.205067749023437500e+02
+-7.697004699707031250e+01,-2.728594970703125000e+01,5.983824157714843750e+01,9.902799987792968750e+01,-4.200209975242614746e-01,7.821754455566406250e+01,9.404795837402343750e+01,1.249092864990234375e+02
+-1.036167678833007812e+02,-5.795334625244140625e+01,-1.503917694091796875e+01,-7.366282653808593750e+01,1.146158523559570312e+02,4.718167114257812500e+01,-2.744146728515625000e+01,-7.087094879150390625e+01
+1.061934661865234375e+02,9.054618835449218750e+01,-6.263049840927124023e-01,-9.296089172363281250e+01,-4.776609039306640625e+01,1.142856140136718750e+02,6.502186584472656250e+01,-1.208296051025390625e+02
+6.298899841308593750e+01,1.053328094482421875e+02,-1.000544433593750000e+02,-3.328774261474609375e+01,4.162991333007812500e+01,-1.166616821289062500e+02,-1.814725494384765625e+01,5.189924621582031250e+01
+-8.104315185546875000e+01,-1.097107315063476562e+02,-7.609074401855468750e+01,7.721604919433593750e+01,3.400953674316406250e+01,-2.119123077392578125e+01,1.236600341796875000e+02,5.729858398437500000e+01
+-4.588986968994140625e+01,6.198348617553710938e+01,-1.117596435546875000e+01,7.594029235839843750e+01,-7.848075866699218750e+00,-9.412262725830078125e+01,-5.613299560546875000e+01,-4.313046264648437500e+01

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/kernel.txt
new file mode 100644
index 0000000..416626e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul/kernel.txt

@@ -0,0 +1,21 @@
+# 1,5,4,8
+3.217739868164062500e+01,-4.575654983520507812e+01,-1.250932769775390625e+02,1.135259246826171875e+02,1.870326232910156250e+01,4.811854553222656250e+01,4.280308532714843750e+01,-1.320864868164062500e+01
+8.949476623535156250e+01,-3.795236968994140625e+01,2.203830033540725708e-01,-1.026309814453125000e+02,-1.138934860229492188e+02,1.262885894775390625e+02,-1.092149658203125000e+02,-8.937467956542968750e+01
+5.905468750000000000e+01,1.048391265869140625e+02,-1.242777557373046875e+02,-7.588436889648437500e+01,7.236830139160156250e+01,-9.981600189208984375e+01,8.246325683593750000e+01,9.635452270507812500e+01
+-7.473950195312500000e+01,9.809680175781250000e+01,9.104861450195312500e+01,-4.299919128417968750e+00,2.549674987792968750e+01,1.863145446777343750e+01,-1.093729705810546875e+02,-2.607852172851562500e+01
+-4.541124725341796875e+01,-4.975766754150390625e+01,-5.976505279541015625e+01,1.020122070312500000e+02,-6.062745666503906250e+01,-1.135064544677734375e+02,-1.030823974609375000e+02,3.759553527832031250e+01
+6.611544799804687500e+01,4.390705871582031250e+01,8.441696166992187500e+01,6.809455871582031250e+01,6.209817504882812500e+01,-6.231644153594970703e+00,-9.188059997558593750e+01,4.051831054687500000e+01
+-5.956570434570312500e+01,1.831413269042968750e+01,-1.913550567626953125e+01,-1.693329620361328125e+01,6.862281799316406250e+01,5.921721935272216797e+00,-3.916234588623046875e+01,-1.090182266235351562e+02
+-3.356804656982421875e+01,1.147797088623046875e+02,-1.196589279174804688e+02,4.634918212890625000e+00,-3.748310852050781250e+01,-6.430694103240966797e+00,3.020614624023437500e+01,-6.272321319580078125e+01
+-9.786624908447265625e+01,-1.989810943603515625e+01,-1.861423492431640625e+01,-4.597468566894531250e+01,2.524142456054687500e+01,-1.041804504394531250e+01,-5.897865295410156250e+01,9.512591552734375000e+01
+4.394152832031250000e+01,-6.969282531738281250e+01,-3.005688476562500000e+01,-9.578435516357421875e+01,-5.227052307128906250e+01,1.192088317871093750e+02,-1.004997482299804688e+02,-5.553445434570312500e+01
+-3.174073791503906250e+01,-1.032137145996093750e+02,-1.198178863525390625e+02,2.824850463867187500e+01,-2.728929138183593750e+01,-8.447923278808593750e+01,8.743931579589843750e+01,-3.851726531982421875e+01
+7.478394031524658203e+00,-8.185469818115234375e+01,-4.803730010986328125e+01,-3.908844757080078125e+01,-1.059177703857421875e+02,2.568099975585937500e+00,1.097543029785156250e+02,-1.218703536987304688e+02
+-6.512222290039062500e+01,-4.518241882324218750e+00,-7.977529907226562500e+01,-4.495685577392578125e+01,-9.700138854980468750e+01,-9.754153442382812500e+01,1.235915832519531250e+02,6.547801208496093750e+01
+-2.471759033203125000e+01,-1.922925567626953125e+01,1.023207855224609375e+02,2.611119079589843750e+01,-1.144902954101562500e+02,-8.321784973144531250e+01,2.100730705261230469e+01,4.209094238281250000e+01
+-6.505221557617187500e+01,-1.287672424316406250e+01,-9.540896606445312500e+01,-7.592190551757812500e+01,6.253384399414062500e+01,-2.609190368652343750e+01,9.872793579101562500e+01,-8.309507751464843750e+01
+1.092861785888671875e+02,1.240314178466796875e+02,-4.161978149414062500e+01,-3.673020172119140625e+01,-7.146414184570312500e+01,3.408689880371093750e+01,4.019821166992187500e+01,5.090278625488281250e+01
+1.034992065429687500e+02,4.370433044433593750e+01,1.106397247314453125e+02,2.916382074356079102e+00,-1.993814849853515625e+01,-1.844695281982421875e+01,1.091971893310546875e+02,6.092024230957031250e+01
+6.083335876464843750e+01,-1.325509643554687500e+01,-3.023486328125000000e+01,5.027542114257812500e+00,5.767028808593750000e+01,1.196073608398437500e+02,-4.072114562988281250e+01,-9.284222412109375000e+01
+1.542956542968750000e+01,2.754859924316406250e+01,8.004570770263671875e+01,-1.157010345458984375e+02,9.206977844238281250e+01,8.940296936035156250e+01,-1.523937988281250000e+01,7.872830200195312500e+01
+-3.596887969970703125e+01,-5.636372375488281250e+01,2.250534057617187500e+01,-6.072342681884765625e+01,1.269192047119140625e+02,2.537686157226562500e+01,1.922425842285156250e+01,4.299319458007812500e+01

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/input.txt
new file mode 100644
index 0000000..e85694d
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/input.txt

@@ -0,0 +1,21 @@
+# 1,4,5,8
+-1.425103515625000000e+03,2.724908593750000000e+04,-7.046689453125000000e+03,3.719925781250000000e+03,-1.967012890625000000e+04,-2.690407421875000000e+04,-2.594553320312500000e+04,2.021718750000000000e+03
+1.392343750000000000e+04,2.192769531250000000e+04,-3.343753906250000000e+03,-1.949542187500000000e+04,-8.453089843750000000e+03,-1.351201171875000000e+04,9.689523437500000000e+03,8.383769531250000000e+03
+2.571485937500000000e+04,3.158718750000000000e+03,6.739875000000000000e+03,7.599640625000000000e+03,1.506513671875000000e+04,1.666317578125000000e+04,-2.332417578125000000e+04,2.980700781250000000e+04
+4.226511718750000000e+03,-1.883247851562500000e+04,-1.854366992187500000e+04,9.583074218750000000e+03,-1.749661718750000000e+04,-5.655492187500000000e+03,3.274915625000000000e+04,2.420670703125000000e+04
+2.057914843750000000e+04,2.856615429687500000e+04,-3.074000781250000000e+04,6.501425781250000000e+03,1.871196484375000000e+04,1.566367187500000000e+03,-2.764669531250000000e+04,-1.365868164062500000e+04
+-1.518945507812500000e+04,-3.163897070312500000e+04,-5.636765625000000000e+03,1.310699609375000000e+04,2.548141406250000000e+04,8.109695312500000000e+03,1.609133984375000000e+04,3.034515234375000000e+04
+2.585478515625000000e+04,-1.406016015625000000e+04,-2.045400976562500000e+04,7.801171875000000000e+03,1.260697265625000000e+04,8.043085937500000000e+02,2.842230859375000000e+04,2.205817187500000000e+04
+-4.720560546875000000e+03,2.435938671875000000e+04,2.020263671875000000e+04,-2.363878906250000000e+04,2.352510937500000000e+04,2.559359375000000000e+04,1.056709765625000000e+04,-7.187898437500000000e+03
+-2.620789648437500000e+04,-2.513364843750000000e+04,-2.550296289062500000e+04,-2.147159375000000000e+04,1.157153515625000000e+04,-8.240796875000000000e+03,-3.169450000000000000e+04,1.099613671875000000e+04
+-4.015791015625000000e+03,2.448114843750000000e+04,-1.993103906250000000e+04,-3.094414062500000000e+02,-9.149316406250000000e+02,-3.018190625000000000e+04,2.310589062500000000e+04,-1.069234375000000000e+04
+-6.850175781250000000e+03,-5.861050781250000000e+03,3.156734765625000000e+04,-9.949003906250000000e+03,-2.072949609375000000e+04,1.264927734375000000e+04,-1.933557226562500000e+04,-7.511267089843750000e+03
+6.989882812500000000e+02,-2.016032421875000000e+04,-2.656232812500000000e+04,2.920930468750000000e+04,1.385375781250000000e+04,1.086737500000000000e+04,2.469232031250000000e+04,-2.150264843750000000e+04
+-3.044261328125000000e+04,2.828403125000000000e+04,-1.451576367187500000e+04,-1.543800585937500000e+04,-2.524538085937500000e+04,2.741479296875000000e+04,-4.386527343750000000e+03,-2.135996289062500000e+04
+-1.337448437500000000e+04,-6.476855468750000000e+02,8.589453125000000000e+03,-1.510911328125000000e+04,2.095779687500000000e+04,1.100132421875000000e+04,4.636781250000000000e+03,-3.099037109375000000e+04
+5.427675781250000000e+03,3.042142578125000000e+04,-4.869300781250000000e+03,1.268807031250000000e+04,1.068471093750000000e+04,-3.059081445312500000e+04,-3.049953515625000000e+04,-2.132600390625000000e+04
+-1.625230664062500000e+04,-2.833001171875000000e+04,-1.370808789062500000e+04,2.979532031250000000e+04,-1.282239843750000000e+04,-3.746115234375000000e+03,2.388173046875000000e+04,-3.151986328125000000e+04
+-2.632854296875000000e+04,5.485324218750000000e+03,1.691496093750000000e+04,-4.177771484375000000e+03,-5.471197265625000000e+03,-3.216432031250000000e+04,1.795335937500000000e+04,6.862597656250000000e+03
+-1.270100976562500000e+04,1.277389843750000000e+04,-2.745739453125000000e+04,-9.879919921875000000e+03,-2.054106835937500000e+04,1.300287890625000000e+04,1.573573046875000000e+04,-1.924201171875000000e+04
+-1.955396679687500000e+04,2.350113476562500000e+04,1.203774609375000000e+04,-2.216824804687500000e+04,2.762613867187500000e+04,1.861830078125000000e+04,2.143993554687500000e+04,-6.484220703125000000e+03
+2.451112500000000000e+04,6.074679687500000000e+03,-1.673687695312500000e+04,-5.000000000000000000e-01,-1.350646484375000000e+04,2.484089453125000000e+04,3.024713281250000000e+04,-1.898581250000000000e+04

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/kernel.txt
new file mode 100644
index 0000000..44cb8cb
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/mul_s16/kernel.txt

@@ -0,0 +1,21 @@
+# 1,4,5,8
+-4.777974609375000000e+03,-3.022644726562500000e+04,3.051593750000000000e+03,4.274800781250000000e+03,3.067806250000000000e+04,3.673921875000000000e+03,1.718764453125000000e+04,-3.046211328125000000e+04
+7.490152343750000000e+03,1.482325000000000000e+04,-9.932605468750000000e+03,-2.892601953125000000e+04,-7.409101562500000000e+02,7.841324218750000000e+03,-1.220482226562500000e+04,5.028820312500000000e+03
+-2.685716796875000000e+03,-3.229366406250000000e+04,2.755517187500000000e+04,-2.232371484375000000e+04,3.135405468750000000e+04,-2.100906250000000000e+03,1.976414453125000000e+04,1.086859375000000000e+04
+-2.691685546875000000e+04,-2.816652343750000000e+04,-3.250991015625000000e+04,1.354970703125000000e+04,2.720939843750000000e+04,-1.721851953125000000e+04,3.098962109375000000e+04,-1.985275976562500000e+04
+-1.329346875000000000e+04,-1.365750976562500000e+04,2.165435546875000000e+04,-3.271517968750000000e+04,5.469042968750000000e+03,5.795738281250000000e+03,-2.592745703125000000e+04,-2.421553710937500000e+04
+2.024035546875000000e+04,1.745229101562500000e+04,-2.459751953125000000e+03,-1.473367187500000000e+03,-5.458197265625000000e+03,1.650673437500000000e+04,-1.516569921875000000e+04,2.526665234375000000e+04
+-1.614822265625000000e+04,3.262424609375000000e+04,1.546023437500000000e+03,2.737675390625000000e+04,2.449608593750000000e+04,2.759233593750000000e+04,1.447634765625000000e+04,-3.042522265625000000e+04
+3.148691796875000000e+04,2.878721093750000000e+04,6.741296875000000000e+03,3.191499609375000000e+04,5.728968750000000000e+03,2.654271875000000000e+04,4.854316406250000000e+03,2.049345312500000000e+04
+-8.639166015625000000e+03,2.560458593750000000e+04,-1.394436523437500000e+04,-2.895786914062500000e+04,3.090774609375000000e+04,-1.290558593750000000e+03,3.628796875000000000e+03,1.451579296875000000e+04
+2.470129687500000000e+04,-1.155723925781250000e+04,2.789051953125000000e+04,-2.129849609375000000e+04,-9.051681640625000000e+03,1.586535937500000000e+04,7.718070312500000000e+03,-7.519947265625000000e+03
+-1.088936523437500000e+04,2.536461328125000000e+04,2.352063281250000000e+04,-2.222727734375000000e+04,-9.340599609375000000e+03,-3.074339843750000000e+04,3.072170312500000000e+04,2.295575000000000000e+04
+-2.855066406250000000e+03,-9.062822265625000000e+03,-1.171989843750000000e+04,4.407031250000000000e+01,-1.095287890625000000e+04,4.721859375000000000e+03,-1.592970312500000000e+04,-2.097570312500000000e+03
+-2.296621093750000000e+03,-2.095112695312500000e+04,7.745070312500000000e+03,1.284000781250000000e+04,-1.214282910156250000e+04,-3.136181054687500000e+04,-2.446296289062500000e+04,1.189535156250000000e+03
+7.879519531250000000e+03,-1.398351367187500000e+04,3.205658984375000000e+04,1.532867968750000000e+04,-4.250740234375000000e+03,6.809074218750000000e+03,-6.571464843750000000e+02,1.408371484375000000e+04
+-4.874878906250000000e+03,1.943416406250000000e+04,-1.250314453125000000e+04,2.886201953125000000e+04,6.276917968750000000e+03,-3.115597851562500000e+04,1.462097265625000000e+04,1.351455859375000000e+04
+-3.782425781250000000e+03,-3.008590039062500000e+04,-3.033045898437500000e+04,1.503886328125000000e+04,2.230409765625000000e+04,-1.056054101562500000e+04,1.364308984375000000e+04,-9.474941406250000000e+03
+-3.764076171875000000e+03,-2.037296289062500000e+04,1.263115234375000000e+04,1.373478906250000000e+04,-1.410587109375000000e+04,2.712390625000000000e+04,1.946813281250000000e+04,2.658819531250000000e+04
+8.712945312500000000e+03,2.198334375000000000e+04,-1.218856445312500000e+04,6.357152343750000000e+03,-7.507048828125000000e+03,2.581895703125000000e+04,-1.851900781250000000e+04,-3.242390625000000000e+04
+2.034826171875000000e+04,-2.546758007812500000e+04,-2.071638671875000000e+04,-1.694490625000000000e+04,-1.941093750000000000e+04,-2.470357617187500000e+04,2.161869531250000000e+04,-1.076089062500000000e+04
+1.648766406250000000e+04,-1.072070312500000000e+04,-1.696540625000000000e+04,9.039542968750000000e+03,-2.013055859375000000e+04,2.089603125000000000e+04,-2.788040234375000000e+04,1.449826171875000000e+04

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/config_data.h
new file mode 100644
index 0000000..3e77310
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/config_data.h

@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define ADD_DST_SIZE 128
+#define ADD_OUT_ACTIVATION_MIN -128
+#define ADD_OUT_ACTIVATION_MAX 127
+#define ADD_INPUT1_OFFSET 128
+#define ADD_INPUT2_OFFSET 128
+#define ADD_OUTPUT_MULT 1073741824
+#define ADD_OUTPUT_SHIFT -19
+#define ADD_OUTPUT_OFFSET -128
+#define ADD_LEFT_SHIFT 20
+#define ADD_INPUT1_SHIFT 0
+#define ADD_INPUT2_SHIFT 0
+#define ADD_INPUT1_MULT 1073741824
+#define ADD_INPUT2_MULT 1073741824

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input1_data.h
new file mode 100644
index 0000000..efa875a
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input1_data.h

@@ -0,0 +1,12 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_input1[128] = {
+    -92,  -108, 69,   -105, 33,  9,    125,  -17,  -120, 8,    109,  -52, 68,   -113, -97,  92,  73,  -58, 72,
+    56,   -28,  -106, -56,  101, -50,  -5,   -89,  -24,  54,   -31,  15,  117,  -68,  37,   72,  41,  -68, 113,
+    -10,  83,   3,    62,   125, 57,   124,  -119, -65,  93,   -122, 120, 105,  -87,  101,  -75, 64,  -88, -62,
+    122,  -20,  64,   -41,  -51, -98,  25,   3,    84,   14,   -33,  111, -37,  -74,  -11,  34,  121, 35,  30,
+    -41,  -77,  -44,  -109, -11, 39,   31,   60,   121,  41,   -49,  50,  38,   -20,  -123, 33,  102, 80,  126,
+    111,  -7,   41,   16,   120, 65,   -67,  10,   -88,  -113, 107,  68,  -120, -71,  -102, 105, 99,  -94, 125,
+    -119, 98,   -47,  46,   -22, -125, -101, 99,   -20,  4,    -1,   -1,  65,   -108};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input2_data.h
new file mode 100644
index 0000000..7ac5e2e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input2_data.h

@@ -0,0 +1,11 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_input2[128] = {
+    72,   77,   -78, -32, -32, 45,  108,  -49, 121, 43,  -25, -47,  35,  14,  87,   31,  27,  96,   -15, 76,  -31, 77,
+    108,  114,  90,  23,  33,  109, -72,  12,  119, -44, -19, -55,  -72, 94,  -22,  65,  103, 43,   -90, -71, 5,   115,
+    -5,   -57,  59,  70,  52,  78,  73,   41,  104, -61, 89,  44,   37,  77,  91,   -2,  7,   -115, 49,  121, -49, 15,
+    76,   -121, 100, 11,  -39, -45, 86,   -35, 120, -65, -9,  -127, 28,  111, 26,   104, 116, 89,   30,  97,  -13, -124,
+    -101, 94,   36,  -86, -75, 49,  6,    40,  95,  99,  -77, -62,  -74, -88, -124, 97,  -38, 122,  -5,  3,   63,  -94,
+    21,   18,   114, -69, 20,  19,  -121, 0,   -59, -65, -69, -71,  -23, 59,  101,  -55, -86, -68};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input_data.h
new file mode 100644
index 0000000..fd82139
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/input_data.h

@@ -0,0 +1,12 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_input[128] = {-19, 39,   -65,  -80,  29,  69,   98,  -96,  -124, -88, -50, -86, -89,  -123, 81,   44,
+                               71,  -17,  -114, -121, -11, 88,   -4,  -46,  -36,  75,  32,  -74, 8,    -57,  -79,  94,
+                               -81, -112, 62,   77,   1,   -73,  51,  -23,  -112, -69, -80, -40, -42,  -77,  -96,  118,
+                               -63, -71,  -108, -16,  -73, -119, 3,   -104, 6,    51,  16,  -98, -100, 68,   -125, -97,
+                               122, -92,  2,    85,   -47, 93,   108, -31,  -27,  123, 86,  16,  38,   62,   28,   7,
+                               95,  71,   116,  -65,  87,  -109, -73, -44,  85,   -9,  99,  21,  51,   -9,   -16,  -7,
+                               24,  109,  -52,  111,  -83, -17,  5,   -37,  25,   -62, -20, 84,  109,  -120, -54,  42,
+                               13,  71,   28,   -112, -1,  116,  -86, -58,  115,  -89, 38,  -98, -113, -114, 106,  24};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/output_ref_data.h
new file mode 100644
index 0000000..e942c3a
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/output_ref_data.h

@@ -0,0 +1,11 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t add_output_ref[128] = {
+    -10, -15, -4,  -68, 1,   27,  117, -33, 1,   26,  42,  -49,  52,  -49, -5,  62,  50,  19,  29,  66,  -29, -14,
+    26,  108, 20,  9,   -28, 43,  -9,  -9,  67,  37,  -43, -9,   0,   68,  -45, 89,  47,  63,  -43, -4,  65,  86,
+    60,  -88, -3,  82,  -35, 99,  89,  -23, 103, -68, 77,  -22,  -12, 100, 36,  31,  -17, -83, -24, 73,  -23, 50,
+    45,  -77, 106, -13, -56, -28, 60,  43,  78,  -17, -25, -102, -8,  1,   8,   72,  74,  75,  76,  69,  -31, -37,
+    -31, 37,  -43, -26, 14,  65,  66,  76,  44,  70,  -30, 29,   -4,  -77, -57, 5,   -75, 115, 32,  -58, -4,  -98,
+    63,  59,  10,  28,  -49, 59,  -84, 23,  -40, -95, -85, 14,   -21, 32,  50,  -28, -10, -88};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add/test_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/config_data.h
new file mode 100644
index 0000000..f651ac3
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/config_data.h

@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define ADD_S16_DST_SIZE 128
+#define ADD_S16_OUT_ACTIVATION_MIN -32768
+#define ADD_S16_OUT_ACTIVATION_MAX 32767
+#define ADD_S16_INPUT1_OFFSET 0
+#define ADD_S16_INPUT2_OFFSET 0
+#define ADD_S16_OUTPUT_MULT 1073741824
+#define ADD_S16_OUTPUT_SHIFT -14
+#define ADD_S16_OUTPUT_OFFSET 0
+#define ADD_S16_LEFT_SHIFT 15
+#define ADD_S16_INPUT1_SHIFT 0
+#define ADD_S16_INPUT2_SHIFT 0
+#define ADD_S16_INPUT1_MULT 1073741824
+#define ADD_S16_INPUT2_MULT 1073741824

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input1_data.h
new file mode 100644
index 0000000..efae5da
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input1_data.h

@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_input1[128] = {
+    -9079,  20648,  -22595, 20792,  -21362, -19376, -32696, -5105,  -29800, -21591, 4030,   -614,   -22340,
+    -16269, -12959, 1518,   21804,  17763,  -16971, -25424, 22232,  3293,   -14149, 12259,  29471,  -11362,
+    2756,   -15725, 22700,  4338,   -9487,  -8780,  9803,   10460,  14088,  -28080, -14231, 23281,  -16379,
+    20656,  -83,    -5054,  -21598, -8347,  -25245, -13369, 7721,   -331,   32465,  -5503,  -16528, 31073,
+    5627,   16663,  27079,  23235,  28623,  -5446,  -13771, 28013,  -647,   2540,   2545,   -9142,  29963,
+    26878,  -24855, 21735,  25795,  19225,  -24044, 3844,   17972,  -14867, -10689, 24210,  21095,  12651,
+    -12486, -5862,  -8676,  -5523,  -18557, 6297,   9717,   -7208,  -4163,  -13545, 18485,  7531,   29116,
+    -16955, -26681, -10336, 16839,  27316,  -16293, -5270,  18828,  12897,  28945,  920,    4623,   -19200,
+    -25034, -19706, -10178, -17564, -3641,  19256,  9496,   20834,  17777,  -6373,  10666,  -22710, 6487,
+    -28583, 2349,   -30565, -25362, -21548, 31627,  -8802,  -31532, 11097,  -7745,  -1160};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input2_data.h
new file mode 100644
index 0000000..fe0ba1d
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/input2_data.h

@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_input2[128] = {
+    -25106, -8289,  -10042, 12111,  27260,  4021,   -17364, 3379,   515,    -17222, 6376,   8065,   -16178,
+    -20774, -27899, -24346, 23909,  -14051, 4294,   25919,  -29365, 13565,  -18084, 8300,   19565,  -31722,
+    16445,  -24759, -1322,  23355,  3142,   32288,  -16469, -15300, 8058,   26894,  -20776, -11359, -22728,
+    16091,  -31116, 31384,  -2444,  -29549, -15409, -18742, -31898, -32765, 240,    -822,   10463,  12688,
+    -29602, -31839, 21866,  -31730, -4092,  21637,  -26447, -32509, 23402,  25039,  30306,  -31017, 30557,
+    6341,   -5359,  26281,  -18907, -19191, -6493,  28223,  5650,   2273,   3781,   4078,   -31507, 21274,
+    -29971, -26713, -4630,  -6975,  -876,   -22427, -23718, -31409, -31524, -14580, 18769,  1524,   28076,
+    -20265, 1982,   -31552, 2618,   1168,   -16913, 31043,  17880,  -23067, 28142,  15142,  -2964,  20675,
+    3525,   -31851, 5391,   6632,   -24131, 13722,  15247,  26582,  -22276, -6355,  15185,  17521,  28530,
+    -11964, -6326,  -10931, 31917,  29332,  -19248, -6136,  23444,  21273,  18617,  6950};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/output_ref_data.h
new file mode 100644
index 0000000..eacff7b
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/output_ref_data.h

@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_output_ref[128] = {
+    -17093, 6180,   -16319, 16452,  2949,   -7678,  -25030, -863,   -14643, -19407, 5203,   3726,   -19259,
+    -18522, -20429, -11414, 22857,  1856,   -6339,  248,    -3567,  8429,   -16117, 10280,  24518,  -21542,
+    9601,   -20242, 10689,  13847,  -3173,  11754,  -3333,  -2420,  11073,  -593,   -17504, 5961,   -19554,
+    18374,  -15600, 13165,  -12021, -18948, -20327, -16056, -12089, -16548, 16353,  -3163,  -3033,  21881,
+    -11988, -7588,  24473,  -4248,  12266,  8096,   -20109, -2248,  11378,  13790,  16426,  -20080, 30260,
+    16610,  -15107, 24008,  3444,   17,     -15269, 16034,  11811,  -6297,  -3454,  14144,  -5206,  16963,
+    -21229, -16288, -6653,  -6249,  -9717,  -8065,  -7001,  -19309, -17844, -14063, 18627,  4528,   28596,
+    -18610, -12350, -20944, 9729,   14242,  -16603, 12887,  18354,  -5085,  28544,  8031,   830,    738,
+    -10755, -25779, -2394,  -5466,  -13886, 16489,  12372,  23708,  -2250,  -6364,  12926,  -2595,  17509,
+    -20274, -1989,  -20748, 3278,   3892,   6190,   -7469,  -4044,  16185,  5436,   2895};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/add_s16/test_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/config_data.h
new file mode 100644
index 0000000..d0fd23e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/config_data.h

@@ -0,0 +1,10 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define MUL_DST_SIZE 160
+#define MUL_OUT_ACTIVATION_MIN -128
+#define MUL_OUT_ACTIVATION_MAX 127
+#define MUL_INPUT1_OFFSET 128
+#define MUL_INPUT2_OFFSET 128
+#define MUL_OUTPUT_MULT 1077952640
+#define MUL_OUTPUT_SHIFT -7
+#define MUL_OUTPUT_OFFSET -128

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input1_data.h
new file mode 100644
index 0000000..831a687
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input1_data.h

@@ -0,0 +1,13 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t mul_input1[160] = {
+    6,    46,   2,    -15, -69,  15,   110, 117,  -40, -47,  -9,  -17,  126, -108, 8,   -42,  -9,   -17, 0,    -126,
+    111,  11,   105,  97,  -5,   -103, 45,  -27,  69,  -107, 16,  -51,  122, 38,   -79, 85,   -45,  26,  69,   3,
+    74,   -3,   -96,  -81, 124,  76,   114, -119, 111, 15,   35,  -73,  -15, -21,  36,  -74,  -28,  3,   54,   -58,
+    87,   -83,  102,  43,  0,    -81,  51,  -98,  -9,  -5,   72,  102,  69,  7,    -35, -40,  -118, -66, 20,   100,
+    -78,  59,   64,   -99, -28,  31,   64,  83,   -4,  107,  -67, 113,  -26, 114,  37,  -50,  -21,  -11, -104, -54,
+    123,  -96,  -106, -55, -123, 123,  72,  -53,  83,  -110, 92,  -120, -76, -27,  59,  99,   0,    78,  94,   124,
+    -103, -57,  -15,  -73, 114,  47,   -27, -70,  106, 90,   0,   -92,  -47, 114,  65,  -120, 62,   105, -100, -33,
+    41,   -116, -18,  51,  -81,  -109, -76, 77,   34,  -21,  123, 57,   -45, 61,   -11, 75,   -7,   -94, -56,  -43};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input2_data.h
new file mode 100644
index 0000000..be6665f
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/input2_data.h

@@ -0,0 +1,13 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t mul_input2[160] = {
+    32,  -45,  -125, 113, 18,  48,  42,  -13,  89,   -37, 0,    -102, -113, 126, -109, -89,  59,  104,  -124, -75,
+    72,  -99,  82,   96,  -74, 98,  91,  -4,   25,   18,  -109, -26,  -45,  -49, -59,  102,  -60, -113, -103, 37,
+    66,  43,   84,   68,  62,  -6,  -91, 40,   -59,  18,  -19,  -16,  68,   5,   -39,  -109, -33, 114,  -119, 4,
+    -37, -6,   30,   -62, -97, -19, -18, -45,  25,   -10, -58,  95,   43,   -69, -30,  -95,  -52, 119,  -100, -55,
+    -31, -103, -119, 28,  -27, -84, 87,  -38,  7,    -81, -48,  -39,  -105, 2,   109,  -121, -65, -4,   -79,  -44,
+    -97, -97,  123,  65,  -24, -19, 102, 26,   -114, -83, 21,   42,   -65,  -12, -95,  -75,  62,  -26,  98,   -83,
+    109, 124,  -41,  -36, -71, 34,  40,  50,   103,  43,  110,  2,    -19,  -18, 109,  60,   60,  -13,  -30,  5,
+    57,  119,  -40,  -92, 15,  27,  80,  -115, 92,   89,  -15,  78,   -35,  -56, 22,   -60,  126, 25,   19,   42};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/output_ref_data.h
new file mode 100644
index 0000000..81a63d7
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/output_ref_data.h

@@ -0,0 +1,14 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int8_t mul_output_ref[160] = {
+    -44,  -71,  -126, -21,  -94,  -29,  31,   -18,  -53,  -99,  -68,  -117, -113, -108, -118, -115, -41,  -27,
+    -126, -128, 59,   -112, 64,   70,   -102, -106, 21,   -79,  -10,  -116, -117, -97,  -47,  -77,  -115, 64,
+    -106, -119, -109, -43,  26,   -44,  -101, -92,  60,   -30,  -93,  -122, -63,  -46,  -58,  -104, -41,  -72,
+    -71,  -124, -91,  -4,   -122, -92,  -51,  -106, 15,   -84,  -112, -108, -51,  -118, -57,  -71,  -73,  73,
+    4,    -97,  -92,  -117, -125, -68,  -112, -63,  -109, -110, -121, -110, -88,  -101, 34,   -54,  -62,  -85,
+    -109, -44,  -119, -5,   25,   -126, -102, -71,  -123, -104, -97,  -124, -106, -73,  -126, -21,  52,   -83,
+    -116, -125, 1,    -123, -115, -82,  -104, -81,  -33,  -46,  69,   -84,  -105, -58,  -89,  -108, -74,  -17,
+    -61,  -88,  84,   18,   -9,   -110, -93,  -24,  51,   -122, 12,   -23,  -117, -78,  -5,   -116, -90,  -103,
+    -102, -116, -86,  -118, 12,   -37,  -17,  21,   -98,  -75,  -59,  -74,  -7,   -108, -86,  -71};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul/test_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/config_data.h
new file mode 100644
index 0000000..6b32b0e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/config_data.h

@@ -0,0 +1,10 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define MUL_S16_DST_SIZE 160
+#define MUL_S16_OUT_ACTIVATION_MIN -32768
+#define MUL_S16_OUT_ACTIVATION_MAX 32767
+#define MUL_S16_INPUT1_OFFSET 0
+#define MUL_S16_INPUT2_OFFSET 0
+#define MUL_S16_OUTPUT_MULT 1073774592
+#define MUL_S16_OUTPUT_SHIFT -14
+#define MUL_S16_OUTPUT_OFFSET 0

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input1_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input1_data.h
new file mode 100644
index 0000000..36a89b2
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input1_data.h

@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_input1[160] = {
+    -1425,  27249,  -7046,  3719,   -19670, -26904, -25945, 2021,   13923,  21927,  -3343,  -19495, -8453,  -13512,
+    9689,   8383,   25714,  3158,   6739,   7599,   15065,  16663,  -23324, 29807,  4226,   -18832, -18543, 9583,
+    -17496, -5655,  32749,  24206,  20579,  28566,  -30740, 6501,   18711,  1566,   -27646, -13658, -15189, -31638,
+    -5636,  13106,  25481,  8109,   16091,  30345,  25854,  -14060, -20454, 7801,   12606,  804,    28422,  22058,
+    -4720,  24359,  20202,  -23638, 23525,  25593,  10567,  -7187,  -26207, -25133, -25502, -21471, 11571,  -8240,
+    -31694, 10996,  -4015,  24481,  -19931, -309,   -914,   -30181, 23105,  -10692, -6850,  -5861,  31567,  -9949,
+    -20729, 12649,  -19335, -7511,  698,    -20160, -26562, 29209,  13853,  10867,  24692,  -21502, -30442, 28284,
+    -14515, -15438, -25245, 27414,  -4386,  -21359, -13374, -647,   8589,   -15109, 20957,  11001,  4636,   -30990,
+    5427,   30421,  -4869,  12688,  10684,  -30590, -30499, -21326, -16252, -28330, -13708, 29795,  -12822, -3746,
+    23881,  -31519, -26328, 5485,   16914,  -4177,  -5471,  -32164, 17953,  6862,   -12701, 12773,  -27457, -9879,
+    -20541, 13002,  15735,  -19242, -19553, 23501,  12037,  -22168, 27626,  18618,  21439,  -6484,  24511,  6074,
+    -16736, 0,      -13506, 24840,  30247,  -18985};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input2_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input2_data.h
new file mode 100644
index 0000000..7cc4cf6
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/input2_data.h

@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_input2[160] = {
+    -4777,  -30226, 3051,   4274,   30678,  3673,   17187,  -30462, 7490,   14823,  -9932,  -28926, -740,   7841,
+    -12204, 5028,   -2685,  -32293, 27555,  -22323, 31354,  -2100,  19764,  10868,  -26916, -28166, -32509, 13549,
+    27209,  -17218, 30989,  -19852, -13293, -13657, 21654,  -32715, 5469,   5795,   -25927, -24215, 20240,  17452,
+    -2459,  -1473,  -5458,  16506,  -15165, 25266,  -16148, 32624,  1546,   27376,  24496,  27592,  14476,  -30425,
+    31486,  28787,  6741,   31914,  5728,   26542,  4854,   20493,  -8639,  25604,  -13944, -28957, 30907,  -1290,
+    3628,   14515,  24701,  -11557, 27890,  -21298, -9051,  15865,  7718,   -7519,  -10889, 25364,  23520,  -22227,
+    -9340,  -30743, 30721,  22955,  -2855,  -9062,  -11719, 44,     -10952, 4721,   -15929, -2097,  -2296,  -20951,
+    7745,   12840,  -12142, -31361, -24462, 1189,   7879,   -13983, 32056,  15328,  -4250,  6809,   -657,   14083,
+    -4874,  19434,  -12503, 28862,  6276,   -31155, 14620,  13514,  -3782,  -30085, -30330, 15038,  22304,  -10560,
+    13643,  -9474,  -3764,  -20372, 12631,  13734,  -14105, 27123,  19468,  26588,  8712,   21983,  -12188, 6357,
+    -7507,  25818,  -18519, -32423, 20348,  -25467, -20716, -16944, -19410, -24703, 21618,  -10760, 16487,  -10720,
+    -16965, 9039,   -20130, 20896,  -27880, 14498};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/output_ref_data.h
new file mode 100644
index 0000000..379788d
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/output_ref_data.h

@@ -0,0 +1,17 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_output_ref[160] = {
+    208,    -25136, -656,   485,    -18416, -3016,  -13609, -1879,  3183,   9919,   1013,   17210, 191,   -3233,
+    -3609,  1286,   -2107,  -3112,  5667,   -5177,  14415,  -1068,  -14068, 9886,   -3471,  16188, 18397, 3963,
+    -14528, 2972,   30972,  -14665, -8349,  -11906, -20314, -6491,  3123,   277,    21875,  10093, -9382, -16851,
+    423,    -589,   -4244,  4085,   -7447,  23398,  -12741, -13999, -965,   6518,   9424,   677,   12556, -20481,
+    -4535,  21400,  4156,   -23023, 4112,   20731,  1565,   -4495,  6909,   -19639, 10852,  18974, 10914, 324,
+    -3509,  4871,   -3027,  -8635,  -16964, 201,    252,    -14613, 5442,   2453,   2276,   -4537, 22659, 6749,
+    5909,   -11868, -18128, -5262,  -61,    5575,   9500,   39,     -4630,  1566,   -12004, 1376,  2133,  -18085,
+    -3431,  -6049,  9355,   -26238, 3274,   -775,   -3216,  276,    8403,   -7068,  -2718,  2286,  -93,   -13319,
+    -807,   18043,  1858,   11176,  2046,   29085,  -13608, -8795,  1876,   26011,  12688,  13674, -8728, 1207,
+    9943,   9113,   3024,   -3410,  6520,   -1751,  2355,   -26624, 10666,  5568,   -3377,  8569,  10213, -1917,
+    4706,   10245,  -8893,  19040,  -12142, -18265, -7610,  11463,  -16365, -14036, 14144,  2129,  12333, -1987,
+    8665,   0,      8297,   15841,  -25736, -8400};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/test_data.h
new file mode 100644
index 0000000..a649371
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/mul_s16/test_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/CMakeLists.txt
new file mode 100644
index 0000000..6442420
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/CMakeLists.txt

@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_add_s16)
+
+target_sources(test_arm_elementwise_add_s16 PRIVATE
+    Unity/unity_test_arm_elementwise_add_s16.c
+    Unity/TestRunner/unity_test_arm_elementwise_add_s16_runner.c)

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c
new file mode 100644
index 0000000..51709c7
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_add_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_add_s16_arm_elementwise_add_s16(void) { add_s16_arm_elementwise_add_s16(); }

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c
new file mode 100644
index 0000000..a9af146
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c

@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/add_s16/test_data.h"
+#include "../Utils/validate.h"
+
+void add_s16_arm_elementwise_add_s16(void)
+{
+    const arm_status expected = ARM_MATH_SUCCESS;
+    int16_t output[ADD_S16_DST_SIZE] = {0};
+
+    const int16_t *input_data1 = add_s16_input1;
+    const int16_t *input_data2 = add_s16_input2;
+
+    const int32_t input_1_mult = ADD_S16_INPUT1_MULT;
+    const int32_t input_1_shift = ADD_S16_INPUT1_SHIFT;
+    const int32_t input_1_offset = ADD_S16_INPUT1_OFFSET;
+    const int32_t input_2_mult = ADD_S16_INPUT2_MULT;
+    const int32_t input_2_shift = ADD_S16_INPUT2_SHIFT;
+    const int32_t input_2_offset = ADD_S16_INPUT2_OFFSET;
+
+    const int32_t left_shift = ADD_S16_LEFT_SHIFT;
+
+    const int32_t out_offset = ADD_S16_OUTPUT_OFFSET;
+    const int32_t out_mult = ADD_S16_OUTPUT_MULT;
+    const int32_t out_shift = ADD_S16_OUTPUT_SHIFT;
+
+    const int32_t out_activation_min = ADD_S16_OUT_ACTIVATION_MIN;
+    const int32_t out_activation_max = ADD_S16_OUT_ACTIVATION_MAX;
+
+    arm_status result = arm_elementwise_add_s16(input_data1,
+                                                input_data2,
+                                                input_1_offset,
+                                                input_1_mult,
+                                                input_1_shift,
+                                                input_2_offset,
+                                                input_2_mult,
+                                                input_2_shift,
+                                                left_shift,
+                                                output,
+                                                out_offset,
+                                                out_mult,
+                                                out_shift,
+                                                out_activation_min,
+                                                out_activation_max,
+                                                ADD_S16_DST_SIZE);
+
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate_s16(output, add_s16_output_ref, ADD_S16_DST_SIZE));
+}

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/CMakeLists.txt
new file mode 100644
index 0000000..a87c4df
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/CMakeLists.txt

@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_add_s8)
+
+target_sources(test_arm_elementwise_add_s8 PRIVATE
+    Unity/unity_test_arm_elementwise_add_s8.c
+    Unity/TestRunner/unity_test_arm_elementwise_add_s8_runner.c)

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/Unity/unity_test_arm_elementwise_add_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/Unity/unity_test_arm_elementwise_add_s8.c
new file mode 100644
index 0000000..2971660
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/Unity/unity_test_arm_elementwise_add_s8.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_add_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_add_arm_elementwise_add_s8(void) { add_arm_elementwise_add_s8(); }

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/test_arm_elementwise_add_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/test_arm_elementwise_add_s8.c
new file mode 100644
index 0000000..d62a567
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_add_s8/test_arm_elementwise_add_s8.c

@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/add/test_data.h"
+#include "../Utils/validate.h"
+
+void add_arm_elementwise_add_s8(void)
+{
+    const arm_status expected = ARM_MATH_SUCCESS;
+    int8_t output[ADD_DST_SIZE] = {0};
+
+    const int8_t *input_data1 = add_input1;
+    const int8_t *input_data2 = add_input2;
+
+    const int32_t input_1_mult = ADD_INPUT1_MULT;
+    const int32_t input_1_shift = ADD_INPUT1_SHIFT;
+    const int32_t input_1_offset = ADD_INPUT1_OFFSET;
+    const int32_t input_2_mult = ADD_INPUT2_MULT;
+    const int32_t input_2_shift = ADD_INPUT2_SHIFT;
+    const int32_t input_2_offset = ADD_INPUT2_OFFSET;
+
+    const int32_t left_shift = ADD_LEFT_SHIFT;
+
+    const int32_t out_offset = ADD_OUTPUT_OFFSET;
+    const int32_t out_mult = ADD_OUTPUT_MULT;
+    const int32_t out_shift = ADD_OUTPUT_SHIFT;
+
+    const int32_t out_activation_min = ADD_OUT_ACTIVATION_MIN;
+    const int32_t out_activation_max = ADD_OUT_ACTIVATION_MAX;
+
+    arm_status result = arm_elementwise_add_s8(input_data1,
+                                               input_data2,
+                                               input_1_offset,
+                                               input_1_mult,
+                                               input_1_shift,
+                                               input_2_offset,
+                                               input_2_mult,
+                                               input_2_shift,
+                                               left_shift,
+                                               output,
+                                               out_offset,
+                                               out_mult,
+                                               out_shift,
+                                               out_activation_min,
+                                               out_activation_max,
+                                               ADD_DST_SIZE);
+
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, add_output_ref, ADD_DST_SIZE));
+}

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/CMakeLists.txt
new file mode 100644
index 0000000..f3f5bbc
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/CMakeLists.txt

@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_mul_s16)
+
+target_sources(test_arm_elementwise_mul_s16 PRIVATE
+    Unity/unity_test_arm_elementwise_mul_s16.c
+    Unity/TestRunner/unity_test_arm_elementwise_mul_s16_runner.c)

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c
new file mode 100644
index 0000000..940dd59
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_mul_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_mul_s16_arm_elementwise_mul_s16(void) { mul_s16_arm_elementwise_mul_s16(); }

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c
new file mode 100644
index 0000000..4b71dbc
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c

@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/mul_s16/test_data.h"
+#include "../Utils/validate.h"
+
+void mul_s16_arm_elementwise_mul_s16(void)
+{
+    const arm_status expected = ARM_MATH_SUCCESS;
+    int16_t output[MUL_S16_DST_SIZE] = {0};
+
+    const int16_t *input_data1 = mul_s16_input1;
+    const int16_t *input_data2 = mul_s16_input2;
+
+    const int32_t input_1_offset = MUL_S16_INPUT1_OFFSET;
+    const int32_t input_2_offset = MUL_S16_INPUT2_OFFSET;
+
+    const int32_t out_offset = MUL_S16_OUTPUT_OFFSET;
+    const int32_t out_mult = MUL_S16_OUTPUT_MULT;
+    const int32_t out_shift = MUL_S16_OUTPUT_SHIFT;
+
+    const int32_t out_activation_min = MUL_S16_OUT_ACTIVATION_MIN;
+    const int32_t out_activation_max = MUL_S16_OUT_ACTIVATION_MAX;
+
+    arm_status result = arm_elementwise_mul_s16(input_data1,
+                                                input_data2,
+                                                input_1_offset,
+                                                input_2_offset,
+                                                output,
+                                                out_offset,
+                                                out_mult,
+                                                out_shift,
+                                                out_activation_min,
+                                                out_activation_max,
+                                                MUL_S16_DST_SIZE);
+
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate_s16(output, mul_s16_output_ref, MUL_S16_DST_SIZE));
+}

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/CMakeLists.txt
new file mode 100644
index 0000000..438f261
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/CMakeLists.txt

@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_elementwise_mul_s8)
+
+target_sources(test_arm_elementwise_mul_s8 PRIVATE
+    Unity/unity_test_arm_elementwise_mul_s8.c
+    Unity/TestRunner/unity_test_arm_elementwise_mul_s8_runner.c)

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/Unity/unity_test_arm_elementwise_mul_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/Unity/unity_test_arm_elementwise_mul_s8.c
new file mode 100644
index 0000000..26cdbff
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/Unity/unity_test_arm_elementwise_mul_s8.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_elementwise_mul_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_mul_arm_elementwise_mul_s8(void) { mul_arm_elementwise_mul_s8(); }

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/test_arm_elementwise_mul_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/test_arm_elementwise_mul_s8.c
new file mode 100644
index 0000000..dac9e81
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_elementwise_mul_s8/test_arm_elementwise_mul_s8.c

@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_nnfunctions.h"
+#include "unity.h"
+
+#include "../TestData/mul/test_data.h"
+#include "../Utils/validate.h"
+
+void mul_arm_elementwise_mul_s8(void)
+{
+    const arm_status expected = ARM_MATH_SUCCESS;
+    int8_t output[MUL_DST_SIZE] = {0};
+
+    const int8_t *input_data1 = mul_input1;
+    const int8_t *input_data2 = mul_input2;
+
+    const int32_t input_1_offset = MUL_INPUT1_OFFSET;
+    const int32_t input_2_offset = MUL_INPUT2_OFFSET;
+
+    const int32_t out_offset = MUL_OUTPUT_OFFSET;
+    const int32_t out_mult = MUL_OUTPUT_MULT;
+    const int32_t out_shift = MUL_OUTPUT_SHIFT;
+
+    const int32_t out_activation_min = MUL_OUT_ACTIVATION_MIN;
+    const int32_t out_activation_max = MUL_OUT_ACTIVATION_MAX;
+
+    arm_status result = arm_elementwise_mul_s8(input_data1,
+                                               input_data2,
+                                               input_1_offset,
+                                               input_2_offset,
+                                               output,
+                                               out_offset,
+                                               out_mult,
+                                               out_shift,
+                                               out_activation_min,
+                                               out_activation_max,
+                                               MUL_DST_SIZE);
+
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, mul_output_ref, MUL_DST_SIZE));
+}

diff --git a/CMSIS/NN/Tests/UnitTest/generate_test_data.py b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
index e018c47..9202b25 100755
--- a/CMSIS/NN/Tests/UnitTest/generate_test_data.py
+++ b/CMSIS/NN/Tests/UnitTest/generate_test_data.py

@@ -57,8 +57,8 @@
     parser.add_argument('-a', '--regenerate-all', action='store_true', help="Regenerate and store all data.")
     parser.add_argument('-t', '--testtype', type=str, default=None, choices=['conv', 'depthwise_conv', 'avgpool',
                                                                              'maxpool', 'fully_connected', 'softmax',
-                                                                             'svdf'],
-                        help='Type of test.')
+                                                                             'svdf', 'add', 'mul'],
+                        help='Type of test. There are the operators that have unit tests.')
     parser.add_argument('--run-all-testsets', action='store_true', help="Run the script for all existing test "
                         "sets. Regenerate all, partially all or no input data (output may still change, depending on"
                         " changes in script) depending on regenerate flags. If used together with the -t flag, only"
@@ -363,24 +363,31 @@
         significand_q31 = round(significand * (1 << 31))
         return significand_q31, shift
 
-    def get_convolving_calib_data_func(self):
+    def get_convolving_calib_data_func(self, n_inputs):
         def representative_data_gen():
-            # testset = np.random.rand(self.batches, self.y_input, self.x_input, self.input_ch).astype('float32')
-            testset = np.ones((self.batches, self.y_input, self.x_input, self.input_ch), dtype=np.float32)
-            yield [testset]
+            representative_testsets = []
+            if n_inputs > 0:
+                for i in range(n_inputs):
+                    representative_testsets.append(np.ones((self.batches, self.y_input, self.x_input, self.input_ch),
+                                                           dtype=np.float32))
+                yield representative_testsets
+            else:
+                raise RuntimeError("Invalid number of representative test sets: {}. Must be more than 0".
+                                   format(self.test_type))
         return representative_data_gen
 
-    def convert_and_interpret(self, model, input_data, inttype):
+    def convert_and_interpret(self, model, inttype, input_data=None):
         """
         Compile and convert a model to Tflite format, run interpreter and allocate tensors.
         """
         model.compile(loss=tf.keras.losses.categorical_crossentropy,
                       optimizer=tf.keras.optimizers.Adam(),
                       metrics=['accuracy'])
+        n_inputs = len(model.inputs)
 
         converter = tf.lite.TFLiteConverter.from_keras_model(model)
         converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = self.get_convolving_calib_data_func()
+        converter.representative_dataset = self.get_convolving_calib_data_func(n_inputs)
         if self.is_int16xint8:
             converter.target_spec.supported_ops = [
                 tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
@@ -398,14 +405,15 @@
             model_path=str(self.model_path_tflite), experimental_op_resolver_type=OpResolverType.BUILTIN_REF)
         interpreter.allocate_tensors()
 
-        input_details = interpreter.get_input_details()
         output_details = interpreter.get_output_details()
-
-        (self.input_scale, self.input_zero_point) = input_details[0]['quantization']
         (self.output_scale, self.output_zero_point) = output_details[0]['quantization']
 
-        # Set input tensors
-        interpreter.set_tensor(input_details[0]["index"], tf.cast(input_data, inttype))
+        if input_data is not None:
+            input_details = interpreter.get_input_details()
+            (self.input_scale, self.input_zero_point) = input_details[0]['quantization']
+
+            # Set input tensors
+            interpreter.set_tensor(input_details[0]["index"], tf.cast(input_data, inttype))
 
         return interpreter
 
@@ -511,8 +519,7 @@
                 input_shape=input_shape[1:], dilation_rate=(self.dilation_y, self.dilation_x))
             model.add(depthwise_layer)
             depthwise_layer.set_weights([weights, biases])
-
-        interpreter = self.convert_and_interpret(model, input_data, inttype)
+        interpreter = self.convert_and_interpret(model, inttype, input_data)
 
         all_layers_details = interpreter.get_tensor_details()
         filter_layer = all_layers_details[1]
@@ -580,7 +587,7 @@
         else:
             raise RuntimeError("Wrong test type")
 
-        interpreter = self.convert_and_interpret(model, input_data, inttype)
+        interpreter = self.convert_and_interpret(model, inttype, input_data)
 
         output_details = interpreter.get_output_details()
         self.set_output_dims_and_padding(output_details[0]['shape'][2], output_details[0]['shape'][1])
@@ -671,7 +678,7 @@
         model.add(fully_connected_layer)
         fully_connected_layer.set_weights([weights, biases])
 
-        interpreter = self.convert_and_interpret(model, input_data, inttype)
+        interpreter = self.convert_and_interpret(model, inttype, input_data)
 
         all_layers_details = interpreter.get_tensor_details()
         if self.is_int16xint8:
@@ -766,7 +773,7 @@
         input_shape = (self.y_input, self.x_input)
         model.add(tf.keras.layers.Softmax(input_shape=input_shape[1:]))
 
-        interpreter = self.convert_and_interpret(model, input_data, tf.int8)
+        interpreter = self.convert_and_interpret(model, tf.int8, input_data)
 
         self.calc_softmax_params()
 
@@ -989,6 +996,110 @@
         return generated_json_file
 
 
+class AddMulSettings(TestSettings):
+
+    def __init__(self, dataset, testtype, args, channels=1, x_in=4, y_in=4, decimal_input=6, randmin=INT8_MIN,
+                 randmax=INT8_MAX, out_activation_min=INT8_MIN, out_activation_max=INT8_MAX, int16xint8=False):
+        super().__init__(dataset, testtype, args, in_ch=channels, out_ch=channels, x_in=x_in, y_in=y_in, w_x=1, w_y=1,
+                         stride_x=1, stride_y=1, pad=False, randmin=randmin, randmax=randmax, batches=1,
+                         generate_bias=False, relu6=False, out_activation_min=out_activation_min,
+                         out_activation_max=out_activation_max, int16xint8=int16xint8)
+
+        self.x_input = self.x_output = x_in
+        self.y_input = self.y_output = y_in
+        self.decimal_input = decimal_input
+
+        self.left_shift = 15 if self.is_int16xint8 else 20
+
+    def generate_data(self, input_data1=None, input_data2=None):
+        input_shape = (1, self.y_input, self.x_input, self.input_ch)
+
+        input_data1 = self.get_randomized_data(list(input_shape),
+                                               self.inputs_table_file,
+                                               regenerate=self.regenerate_new_input,
+                                               decimals=self.decimal_input)
+        input_data2 = self.get_randomized_data(list(input_shape),
+                                               self.kernel_table_file,
+                                               regenerate=self.regenerate_new_weights,
+                                               decimals=self.decimal_input)
+
+        if self.is_int16xint8:
+            inttype = "int16_t"
+            inttype_tf = tf.int16
+        else:
+            inttype = "int8_t"
+            inttype_tf = tf.int8
+
+        # Create a one-layer functional Keras model as add/mul cannot use a sequntial Keras model.
+        input1 = tf.keras.layers.Input(shape=input_shape[1:])
+        input2 = tf.keras.layers.Input(shape=input_shape[1:])
+        if self.test_type == 'add':
+            layer = tf.keras.layers.Add()([input1, input2])
+        elif self.test_type == 'mul':
+            layer = tf.keras.layers.Multiply()([input1, input2])
+        else:
+            raise RuntimeError("Wrong test type")
+        out = tf.keras.layers.Lambda(function=lambda x: x)(layer)
+        model = tf.keras.models.Model(inputs=[input1, input2], outputs=out)
+
+        interpreter = self.convert_and_interpret(model, inttype_tf)
+
+        input_details = interpreter.get_input_details()
+        interpreter.set_tensor(input_details[0]["index"], tf.cast(input_data1, inttype_tf))
+        interpreter.set_tensor(input_details[1]["index"], tf.cast(input_data2, inttype_tf))
+
+        # Calculate multipliers, shifts and offsets.
+        (input1_scale, self.input1_zero_point) = input_details[0]['quantization']
+        (input2_scale, self.input2_zero_point) = input_details[1]['quantization']
+        self.input1_zero_point = -self.input1_zero_point
+        self.input2_zero_point = -self.input2_zero_point
+        double_max_input_scale = max(input1_scale, input2_scale) * 2
+        (self.input1_mult, self.input1_shift) = self.quantize_scale(input1_scale/double_max_input_scale)
+        (self.input2_mult, self.input2_shift) = self.quantize_scale(input2_scale/double_max_input_scale)
+
+        if self.test_type == 'add':
+            actual_output_scale = double_max_input_scale / ((1 << self.left_shift) * self.output_scale)
+        elif self.test_type == 'mul':
+            actual_output_scale = input1_scale * input2_scale / self.output_scale
+        (self.output_mult, self.output_shift) = self.quantize_scale(actual_output_scale)
+
+        # Generate reference.
+        interpreter.invoke()
+        output_details = interpreter.get_output_details()
+        output_data = interpreter.get_tensor(output_details[0]["index"])
+        self.generate_c_array("input1", input_data1, datatype=inttype)
+        self.generate_c_array("input2", input_data2, datatype=inttype)
+        self.generate_c_array("output_ref", np.clip(output_data, self.out_activation_min, self.out_activation_max),
+                              datatype=inttype)
+
+        self.write_c_config_header()
+        self.write_c_header_wrapper()
+
+    def write_c_config_header(self):
+        super().write_c_config_header(write_common_parameters=False)
+
+        filename = self.config_data
+        filepath = self.headers_dir + filename
+        prefix = self.testdataset.upper()
+
+        with open(filepath, "a") as f:
+            f.write("#define {}_DST_SIZE {}\n".format(prefix,
+                                                      self.batches * self.y_input * self.x_input * self.input_ch))
+            f.write("#define {}_OUT_ACTIVATION_MIN {}\n".format(prefix, self.out_activation_min))
+            f.write("#define {}_OUT_ACTIVATION_MAX {}\n".format(prefix, self.out_activation_max))
+            f.write("#define {}_INPUT1_OFFSET {}\n".format(prefix, self.input1_zero_point))
+            f.write("#define {}_INPUT2_OFFSET {}\n".format(prefix, self.input2_zero_point))
+            f.write("#define {}_OUTPUT_MULT {}\n".format(prefix, self.output_mult))
+            f.write("#define {}_OUTPUT_SHIFT {}\n".format(prefix, self.output_shift))
+            f.write("#define {}_OUTPUT_OFFSET {}\n".format(prefix, self.output_zero_point))
+            if self.test_type == 'add':
+                f.write("#define {}_LEFT_SHIFT {}\n".format(prefix, self.left_shift))
+                f.write("#define {}_INPUT1_SHIFT {}\n".format(prefix, self.input1_shift))
+                f.write("#define {}_INPUT2_SHIFT {}\n".format(prefix, self.input2_shift))
+                f.write("#define {}_INPUT1_MULT {}\n".format(prefix, self.input1_mult))
+                f.write("#define {}_INPUT2_MULT {}\n".format(prefix, self.input2_mult))
+
+
 def load_all_testdatasets():
     """
     Add all new testdata sets here
@@ -1235,21 +1346,34 @@
     dataset = 'svdf'
     ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args,  batches=2, number_inputs=2, rank=8,
                                               memory_size=8, input_size=3, number_units=3)
-    type_of_test = 'svdf'
     dataset = 'svdf_1'
     ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args,  batches=3, number_inputs=2, rank=1,
                                               memory_size=2, input_size=7, number_units=5)
-
-    type_of_test = 'svdf'
     dataset = 'svdf_2'
     ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args,  batches=3, number_inputs=2, rank=2,
                                               memory_size=2, input_size=7, number_units=5, generate_bias=False)
-
-    type_of_test = 'svdf'
     dataset = 'svdf_3'
     ALL_TESTDATA_SETS[dataset] = SVDFSettings(dataset, type_of_test, args,  batches=1, number_inputs=2, rank=1,
                                               memory_size=2, input_size=20, number_units=12, generate_bias=False)
 
+    type_of_test = 'add'
+    dataset = 'add'
+    ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=4, y_in=4,
+                                                randmin=INT8_MIN, randmax=INT8_MAX)
+    dataset = 'add_s16'
+    ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=4, y_in=4,
+                                                randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=INT16_MIN,
+                                                out_activation_max=INT16_MAX, int16xint8=True)
+
+    type_of_test = 'mul'
+    dataset = 'mul'
+    ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=4, y_in=5,
+                                                randmin=INT8_MIN, randmax=INT8_MAX)
+    dataset = 'mul_s16'
+    ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=5, y_in=4,
+                                                randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=INT16_MIN,
+                                                out_activation_max=INT16_MAX, int16xint8=True)
+
 
 if __name__ == '__main__':
     if version.parse(tf.__version__) < REQUIRED_MINIMUM_TENSORFLOW_VERSION:
@@ -1295,6 +1419,8 @@
                 generator = SoftmaxSettings(testdataset, test_type, args)
             elif args.testtype == 'svdf':
                 generator = SVDFSettings(testdataset, test_type, args)
+            elif args.testtype == 'add' or args.testtype == 'mul':
+                generator = AddMulSettings(testdataset, test_type, args)
             else:
                 raise RuntimeError("Please specify type of test with -t")
         generator.generate_data()
commit	773cdffff47e6ffca2e9e4d5d422c2cd2570f26f	[log] [tgz]
author	Måns Nilsson <mans.nilsson@arm.com>	Thu Feb 17 15:11:54 2022 +0100
committer	GitHub <noreply@github.com>	Thu Feb 17 16:11:54 2022 +0200
tree	31b6c80ac1e17dd38753dbe747442dd989230acf
parent	addd45db29184b4da1efe90c1613fe830e3da921 [diff]