CMSIS-NN: Add int16 conv kernel support

* Adds test_arm_convolve_wrapper_s16 and test_arm_convolve_16 with
  reference implementation.
* This requires new requantization so it is based on new function,
  arm_nn_requantize_s64.
* Adds unit tests for this.

Change-Id: I730458996472d6dbb51d239bc7530dd53d21130f
diff --git a/ARM.CMSIS.pdsc b/ARM.CMSIS.pdsc
index 0153a50..137f663 100644
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc
@@ -11,6 +11,8 @@
     <release version="5.8.1">
       Active development ...
       CMSIS-DSP: 1.10.0 (see revision history for details)
+      CMSIS-NN: 3.0.1 (see revision history for details)
+       - Support for int16
     </release>
     <release version="5.8.0" date="2021-06-24">
       CMSIS-Core(M): 5.5.0 (see revision history for details)
@@ -2815,12 +2817,14 @@
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c"/>
+        <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c"/>
+        <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c"/>
         <file category="source" name="CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c"/>
diff --git a/CMSIS/DoxyGen/NN/src/history.txt b/CMSIS/DoxyGen/NN/src/history.txt
index a800671..45d04f3 100644
--- a/CMSIS/DoxyGen/NN/src/history.txt
+++ b/CMSIS/DoxyGen/NN/src/history.txt
@@ -7,6 +7,18 @@
     <th>Description</th>
   </tr>
   <tr>
+    <td>V3.0.1</td>
+    <td>
+    <ul>
+      <li>Added arm_convolve_s16 and arm_convolve_wrapper_s16<br>
+      with support for int16.<br></li>
+      <li> Added conv int16 reference implementation </li>
+      <li> Added new requantization function for int16 </li>
+      <li> Added unit tests for int16 conv kernel </li>
+      </ul>
+    </td>
+  </tr>
+  <tr>
     <td>V3.0.0</td>
     <td>
     <ul>
diff --git a/CMSIS/NN/Include/arm_nnfunctions.h b/CMSIS/NN/Include/arm_nnfunctions.h
index fc78242..3387ce2 100644
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        19 March 2021
- * $Revision:    V.7.0.0
+ * $Date:        14 June 2021
+ * $Revision:    V.7.1.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -223,6 +223,64 @@
                                                 const cmsis_nn_dims *output_dims);
 
 /**
+ * @brief s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
+ cmsis-nn
+ *        to perform the convolution.
+ *
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+ *
+ * @return     The function returns either
+ *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
+ *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
+ *
+ */
+arm_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_conv_params *conv_params,
+                                    const cmsis_nn_per_channel_quant_params *quant_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q15_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const q7_t *filter_data,
+                                    const cmsis_nn_dims *bias_dims,
+                                    const int64_t *bias_data,
+                                    const cmsis_nn_dims *output_dims,
+                                    q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for arm_convolve_wrapper_s16
+ *
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      input_dims     Input (activation) dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      filter_dims    Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
+ *                                filter dimensions
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ *
+ * @return         The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
+                                                 const cmsis_nn_dims *input_dims,
+                                                 const cmsis_nn_dims *filter_dims,
+                                                 const cmsis_nn_dims *output_dims);
+
+/**
  * @brief Basic s8 convolution function
  * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
                                   arm_convolve_s8_get_buffer_size will return the buffer_size if required
@@ -273,6 +331,56 @@
 int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
 
 /**
+ * @brief Basic s16 convolution function
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+                                  arm_convolve_s16_get_buffer_size will return the buffer_size if required
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
+ *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
+ *
+ */
+arm_status arm_convolve_s16(const cmsis_nn_context *ctx,
+                            const cmsis_nn_conv_params *conv_params,
+                            const cmsis_nn_per_channel_quant_params *quant_params,
+                            const cmsis_nn_dims *input_dims,
+                            const q15_t *input_data,
+                            const cmsis_nn_dims *filter_dims,
+                            const q7_t *filter_data,
+                            const cmsis_nn_dims *bias_dims,
+                            const int64_t *bias_data,
+                            const cmsis_nn_dims *output_dims,
+                            q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for s16 convolution function
+ *
+ * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
+ * are the spatial filter dimensions
+ * @return          The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
  * @brief Basic Q7 convolution function
  * @param[in]       Im_in       pointer to input tensor
  * @param[in]       dim_im_in   input tensor dimension
diff --git a/CMSIS/NN/Include/arm_nnsupportfunctions.h b/CMSIS/NN/Include/arm_nnsupportfunctions.h
index 74888b5..669419f 100644
--- a/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/CMSIS/NN/Include/arm_nnsupportfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        15. April 2021
- * $Revision:    V.5.5.0
+ * $Date:        5. July 2021
+ * $Revision:    V.5.6.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -46,6 +46,7 @@
 #define MAX(A, B) ((A) > (B) ? (A) : (B))
 #define MIN(A, B) ((A) < (B) ? (A) : (B))
 #define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
+#define REDUCE_MULTIPLIER(_mult) ((_mult < 0x7FFF0000) ? ((_mult + (1 << 15)) >> 16) : 0x7FFF)
 
 /**
  * @brief Union for SIMD access of q31/q15/q7 types
@@ -799,6 +800,26 @@
 }
 
 /**
+ * @brief           Requantize a given 64 bit value.
+ * @param[in]       val                 Value to be requantized
+ * @param[in]       reduced_multiplier  Reduced multiplier from range {Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1, Q16_MAX}
+ * @param[in]       shift               left or right shift for 'val * multiplier'
+ *
+ * @return          Returns (val * multiplier)/(2 ^ shift)
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_requantize_s64(const q63_t val, const q31_t reduced_multiplier, const q31_t shift)
+{
+    q31_t result = 0;
+    q63_t new_val = val * reduced_multiplier;
+
+    result = new_val >> (14 - shift); // 64->32 bit reduction
+    result = (result + 1) >> 1;       // Last shift position and insert round
+
+    return result;
+}
+
+/**
  * @brief           memcpy optimized for MVE
  * @param[in, out]  dst         Destination pointer
  * @param[in]       src         Source pointer.
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt b/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt
index 7d82b20..2fa6c28 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt
+++ b/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt
@@ -20,6 +20,7 @@
 
 file(GLOB SRC "./*_s8*.c")
 add_library(CMSISNNConvolutions STATIC ${SRC})
+target_sources(CMSISNNConvolutions PUBLIC arm_convolve_s16.c arm_convolve_wrapper_s16.c)
 
 ### Includes
 target_include_directories(CMSISNNConvolutions PUBLIC "${NN}/Include")
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c
new file mode 100644
index 0000000..b8df42b
--- /dev/null
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_convolve_s16.c
+ * Description:  s16 version of convolution using symmetric quantization.
+ *
+ * $Date:        July 5, 2021
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+/*
+ * Basic s16 convolution function.
+ *
+ * Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels
+ * are multiples of 4 or atleast greater than 4.
+ *
+ */
+
+arm_status arm_convolve_s16(const cmsis_nn_context *ctx,
+                            const cmsis_nn_conv_params *conv_params,
+                            const cmsis_nn_per_channel_quant_params *quant_params,
+                            const cmsis_nn_dims *input_dims,
+                            const q15_t *input_data,
+                            const cmsis_nn_dims *filter_dims,
+                            const q7_t *filter_data,
+                            const cmsis_nn_dims *bias_dims,
+                            const int64_t *bias_data,
+                            const cmsis_nn_dims *output_dims,
+                            q15_t *output_data)
+{
+    (void)bias_dims;
+    q15_t *buffer_a = (q15_t *)ctx->buf;
+
+    const int32_t input_batches = input_dims->n;
+    const int32_t input_x = input_dims->w;
+    const int32_t input_y = input_dims->h;
+    const int32_t input_ch = input_dims->c;
+    const int32_t kernel_x = filter_dims->w;
+    const int32_t kernel_y = filter_dims->h;
+    const int32_t output_x = output_dims->w;
+    const int32_t output_y = output_dims->h;
+    const int32_t output_ch = output_dims->c;
+
+    const int32_t pad_x = conv_params->padding.w;
+    const int32_t pad_y = conv_params->padding.h;
+    const int32_t stride_x = conv_params->stride.w;
+    const int32_t stride_y = conv_params->stride.h;
+
+    const int32_t out_activation_min = conv_params->activation.min;
+    const int32_t out_activation_max = conv_params->activation.max;
+    int32_t *output_mult = quant_params->multiplier;
+    int32_t *output_shift = quant_params->shift;
+
+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    {
+        /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
+        (void)buffer_a;
+
+        for (int32_t i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
+        {
+            q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i_out_ch]);
+
+            for (int32_t base_idx_y = -pad_y, i_out_y = 0; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
+            {
+                for (int32_t base_idx_x = -pad_x, i_out_x = 0; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
+                {
+                    int64_t conv_out_acc = 0;
+
+                    const int32_t ker_y_start = MAX(0, -base_idx_y);
+                    const int32_t ker_x_start = MAX(0, -base_idx_x);
+
+                    const int32_t ker_y_end = MIN(kernel_y, input_y - base_idx_y);
+                    const int32_t ker_x_end = MIN(kernel_x, input_x - base_idx_x);
+
+                    for (int32_t i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
+                    {
+                        for (int32_t i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
+                        {
+                            const int32_t in_row = base_idx_y + i_ker_y;
+                            const int32_t in_col = base_idx_x + i_ker_x;
+                            for (int32_t i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
+                            {
+                                conv_out_acc += input_data[(in_row * input_x + in_col) * input_ch + i_input_ch] *
+                                    filter_data[i_out_ch * input_ch * kernel_y * kernel_x +
+                                                (i_ker_y * kernel_x + i_ker_x) * input_ch + i_input_ch];
+                            }
+                        }
+                    }
+
+                    if (bias_data)
+                    {
+                        conv_out_acc += bias_data[i_out_ch];
+                    }
+
+                    int32_t conv_out = arm_nn_requantize_s64(conv_out_acc, reduced_multiplier, output_shift[i_out_ch]);
+                    conv_out = MAX(conv_out, out_activation_min);
+                    conv_out = MIN(conv_out, out_activation_max);
+                    output_data[i_out_ch + (i_out_y * output_x + i_out_x) * output_ch] = (int16_t)conv_out;
+                }
+            }
+        }
+        /* Advance to the next batch */
+        input_data += (input_x * input_y * input_ch);
+        output_data += (output_x * output_y * output_ch);
+    }
+
+    /* Return to application */
+    return ARM_MATH_SUCCESS;
+}
+
+int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
+{
+    (void)input_dims;
+    (void)filter_dims;
+    return 0;
+}
+
+/**
+ * @} end of NNConv group
+ */
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
index 32b31d0..f298715 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_s8.c
  * Description:  s8 version of convolution using symmetric quantization.
  *
- * $Date:        January 26, 2021
- * $Revision:    V.2.0.4
+ * $Date:        June 23, 2021
+ * $Revision:    V.2.0.5
  *
  * Target Processor:  Cortex-M cores
  *
@@ -63,7 +63,7 @@
     (void)bias_dims;
     q15_t *buffer_a = (q15_t *)ctx->buf;
 
-    const uint16_t input_batches = input_dims->n;
+    const int32_t input_batches = input_dims->n;
     const uint16_t input_x = input_dims->w;
     const uint16_t input_y = input_dims->h;
     const uint16_t input_ch = input_dims->c;
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c
new file mode 100644
index 0000000..33c5d43
--- /dev/null
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_convolve_wrapper_s16.c
+ * Description:  s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
+ * cmsis-nn to perform the convolution.
+ *
+ * $Date:        14. June 2021
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+/*
+ * Convolution layer
+ *
+ * Refer header file for details.
+ *
+ */
+
+arm_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_conv_params *conv_params,
+                                    const cmsis_nn_per_channel_quant_params *quant_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q15_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const q7_t *filter_data,
+                                    const cmsis_nn_dims *bias_dims,
+                                    const int64_t *bias_data,
+                                    const cmsis_nn_dims *output_dims,
+                                    q15_t *output_data)
+{
+    return arm_convolve_s16(ctx,
+                            conv_params,
+                            quant_params,
+                            input_dims,
+                            input_data,
+                            filter_dims,
+                            filter_data,
+                            bias_dims,
+                            bias_data,
+                            output_dims,
+                            output_data);
+}
+
+int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
+                                                 const cmsis_nn_dims *input_dims,
+                                                 const cmsis_nn_dims *filter_dims,
+                                                 const cmsis_nn_dims *output_dims)
+{
+    (void)conv_params;
+    (void)output_dims;
+
+    return arm_convolve_s16_get_buffer_size(input_dims, filter_dims);
+}
+
+/**
+ * @} end of NNConv group
+ */
diff --git a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
index a9f4d8d..2c426bd 100644
--- a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
+++ b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
@@ -22,6 +22,12 @@
 
 set(CMSIS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../..")
 
+add_compile_options(-O0
+                    -Werror
+                    -Wimplicit-function-declaration
+                    -Wunused-variable
+                    -Wno-redundant-decls)
+
 option(BUILD_CMSIS_NN_UNIT "If building the unit tests from another project, i.e. \
 platform dependencies need to be provided externally." OFF)
 
@@ -62,6 +68,7 @@
 add_subdirectory(TestCases/test_arm_avgpool_s8)
 add_subdirectory(TestCases/test_arm_convolve_1x1_s8_fast)
 add_subdirectory(TestCases/test_arm_convolve_s8)
+add_subdirectory(TestCases/test_arm_convolve_s16)
 add_subdirectory(TestCases/test_arm_depthwise_conv_3x3_s8)
 add_subdirectory(TestCases/test_arm_depthwise_conv_s8)
 add_subdirectory(TestCases/test_arm_depthwise_conv_s8_opt)
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/bias.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/bias.txt
new file mode 100644
index 0000000..deac25a
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/bias.txt
@@ -0,0 +1,2 @@
+# 4
+-1.000000000000000000e+00,-4.000000000000000000e+00,-3.000000000000000000e+00,-2.000000000000000000e+00
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/input.txt
new file mode 100644
index 0000000..719d5c5
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/input.txt
@@ -0,0 +1,57 @@
+# 1,8,7,3
+0.000000000000000000e+00,-2.000000000000000000e+00,2.000000000000000000e+00
+-2.000000000000000000e+00,-2.000000000000000000e+00,-1.000000000000000000e+00
+1.000000000000000000e+00,0.000000000000000000e+00,-4.000000000000000000e+00
+0.000000000000000000e+00,-4.000000000000000000e+00,-2.000000000000000000e+00
+0.000000000000000000e+00,-4.000000000000000000e+00,-2.000000000000000000e+00
+-4.000000000000000000e+00,1.000000000000000000e+00,0.000000000000000000e+00
+0.000000000000000000e+00,-3.000000000000000000e+00,1.000000000000000000e+00
+-1.000000000000000000e+00,-2.000000000000000000e+00,2.000000000000000000e+00
+-4.000000000000000000e+00,1.000000000000000000e+00,-2.000000000000000000e+00
+-1.000000000000000000e+00,1.000000000000000000e+00,-2.000000000000000000e+00
+2.000000000000000000e+00,-1.000000000000000000e+00,2.000000000000000000e+00
+-4.000000000000000000e+00,-3.000000000000000000e+00,-1.000000000000000000e+00
+-3.000000000000000000e+00,-1.000000000000000000e+00,-2.000000000000000000e+00
+-4.000000000000000000e+00,0.000000000000000000e+00,-1.000000000000000000e+00
+-3.000000000000000000e+00,3.000000000000000000e+00,-2.000000000000000000e+00
+2.000000000000000000e+00,3.000000000000000000e+00,1.000000000000000000e+00
+-4.000000000000000000e+00,-3.000000000000000000e+00,-1.000000000000000000e+00
+1.000000000000000000e+00,3.000000000000000000e+00,1.000000000000000000e+00
+-3.000000000000000000e+00,-2.000000000000000000e+00,-4.000000000000000000e+00
+0.000000000000000000e+00,0.000000000000000000e+00,1.000000000000000000e+00
+1.000000000000000000e+00,3.000000000000000000e+00,-1.000000000000000000e+00
+-2.000000000000000000e+00,-2.000000000000000000e+00,-3.000000000000000000e+00
+0.000000000000000000e+00,2.000000000000000000e+00,3.000000000000000000e+00
+-4.000000000000000000e+00,3.000000000000000000e+00,1.000000000000000000e+00
+0.000000000000000000e+00,-4.000000000000000000e+00,-3.000000000000000000e+00
+0.000000000000000000e+00,-1.000000000000000000e+00,-3.000000000000000000e+00
+-2.000000000000000000e+00,-2.000000000000000000e+00,-1.000000000000000000e+00
+2.000000000000000000e+00,1.000000000000000000e+00,-2.000000000000000000e+00
+3.000000000000000000e+00,-3.000000000000000000e+00,-4.000000000000000000e+00
+3.000000000000000000e+00,0.000000000000000000e+00,-4.000000000000000000e+00
+-2.000000000000000000e+00,0.000000000000000000e+00,3.000000000000000000e+00
+2.000000000000000000e+00,0.000000000000000000e+00,-2.000000000000000000e+00
+1.000000000000000000e+00,1.000000000000000000e+00,3.000000000000000000e+00
+2.000000000000000000e+00,1.000000000000000000e+00,-1.000000000000000000e+00
+-3.000000000000000000e+00,-1.000000000000000000e+00,1.000000000000000000e+00
+3.000000000000000000e+00,-3.000000000000000000e+00,2.000000000000000000e+00
+1.000000000000000000e+00,0.000000000000000000e+00,-2.000000000000000000e+00
+-2.000000000000000000e+00,0.000000000000000000e+00,-1.000000000000000000e+00
+-4.000000000000000000e+00,0.000000000000000000e+00,-3.000000000000000000e+00
+-1.000000000000000000e+00,3.000000000000000000e+00,-1.000000000000000000e+00
+1.000000000000000000e+00,-2.000000000000000000e+00,0.000000000000000000e+00
+-2.000000000000000000e+00,-3.000000000000000000e+00,3.000000000000000000e+00
+1.000000000000000000e+00,3.000000000000000000e+00,-3.000000000000000000e+00
+-4.000000000000000000e+00,-3.000000000000000000e+00,0.000000000000000000e+00
+-1.000000000000000000e+00,2.000000000000000000e+00,0.000000000000000000e+00
+-4.000000000000000000e+00,2.000000000000000000e+00,1.000000000000000000e+00
+-3.000000000000000000e+00,1.000000000000000000e+00,0.000000000000000000e+00
+1.000000000000000000e+00,-2.000000000000000000e+00,-4.000000000000000000e+00
+3.000000000000000000e+00,-1.000000000000000000e+00,-4.000000000000000000e+00
+2.000000000000000000e+00,2.000000000000000000e+00,-4.000000000000000000e+00
+0.000000000000000000e+00,1.000000000000000000e+00,1.000000000000000000e+00
+0.000000000000000000e+00,-2.000000000000000000e+00,-4.000000000000000000e+00
+0.000000000000000000e+00,-4.000000000000000000e+00,1.000000000000000000e+00
+-3.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+00
+2.000000000000000000e+00,0.000000000000000000e+00,2.000000000000000000e+00
+0.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+00
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/kernel.txt
new file mode 100644
index 0000000..86e52ba
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/kernel.txt
@@ -0,0 +1,25 @@
+# 4,2,3,4
+1.000000000000000000e+00,-4.000000000000000000e+00,2.000000000000000000e+00,-3.000000000000000000e+00
+0.000000000000000000e+00,0.000000000000000000e+00,3.000000000000000000e+00,-2.000000000000000000e+00
+-4.000000000000000000e+00,0.000000000000000000e+00,-2.000000000000000000e+00,-2.000000000000000000e+00
+-4.000000000000000000e+00,1.000000000000000000e+00,1.000000000000000000e+00,-1.000000000000000000e+00
+-3.000000000000000000e+00,-3.000000000000000000e+00,2.000000000000000000e+00,2.000000000000000000e+00
+0.000000000000000000e+00,0.000000000000000000e+00,-4.000000000000000000e+00,0.000000000000000000e+00
+-3.000000000000000000e+00,-3.000000000000000000e+00,-4.000000000000000000e+00,-1.000000000000000000e+00
+0.000000000000000000e+00,0.000000000000000000e+00,-3.000000000000000000e+00,0.000000000000000000e+00
+-2.000000000000000000e+00,3.000000000000000000e+00,-4.000000000000000000e+00,-1.000000000000000000e+00
+1.000000000000000000e+00,0.000000000000000000e+00,-2.000000000000000000e+00,-3.000000000000000000e+00
+-4.000000000000000000e+00,-2.000000000000000000e+00,2.000000000000000000e+00,-1.000000000000000000e+00
+-4.000000000000000000e+00,-2.000000000000000000e+00,-2.000000000000000000e+00,0.000000000000000000e+00
+-1.000000000000000000e+00,-3.000000000000000000e+00,0.000000000000000000e+00,-4.000000000000000000e+00
+2.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+00,2.000000000000000000e+00
+-3.000000000000000000e+00,1.000000000000000000e+00,-2.000000000000000000e+00,-2.000000000000000000e+00
+1.000000000000000000e+00,-2.000000000000000000e+00,0.000000000000000000e+00,3.000000000000000000e+00
+-4.000000000000000000e+00,-4.000000000000000000e+00,-3.000000000000000000e+00,3.000000000000000000e+00
+-2.000000000000000000e+00,-2.000000000000000000e+00,-2.000000000000000000e+00,-4.000000000000000000e+00
+-2.000000000000000000e+00,3.000000000000000000e+00,1.000000000000000000e+00,3.000000000000000000e+00
+3.000000000000000000e+00,3.000000000000000000e+00,0.000000000000000000e+00,2.000000000000000000e+00
+-2.000000000000000000e+00,-1.000000000000000000e+00,-4.000000000000000000e+00,2.000000000000000000e+00
+-4.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+00,3.000000000000000000e+00
+-4.000000000000000000e+00,2.000000000000000000e+00,2.000000000000000000e+00,-3.000000000000000000e+00
+3.000000000000000000e+00,-2.000000000000000000e+00,-3.000000000000000000e+00,3.000000000000000000e+00
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/params.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/params.txt
new file mode 100644
index 0000000..53f7849
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/int16xint8/params.txt
@@ -0,0 +1,12 @@
+3
+4
+7
+8
+2
+4
+2
+3
+0
+1
+1
+1
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/bias.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/bias.txt
new file mode 100644
index 0000000..cd69f41
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/bias.txt
@@ -0,0 +1,2 @@
+# 2
+1.590836480000000000e+09,2.583949568000000000e+09
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/input.txt
new file mode 100644
index 0000000..3a03b01
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/input.txt
@@ -0,0 +1,7 @@
+# 1,2,3,2
+-7.000000000000000000e+00,5.000000000000000000e+00
+-7.000000000000000000e+00,-6.000000000000000000e+00
+-3.000000000000000000e+00,7.000000000000000000e+00
+-3.000000000000000000e+00,-4.000000000000000000e+00
+2.000000000000000000e+00,4.000000000000000000e+00
+5.000000000000000000e+00,-7.000000000000000000e+00
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/kernel.txt
new file mode 100644
index 0000000..8709479
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/kernel.txt
@@ -0,0 +1,9 @@
+# 2,2,2,2
+-4.000000000000000000e+00,1.000000000000000000e+00
+1.000000000000000000e+00,0.000000000000000000e+00
+3.000000000000000000e+00,1.000000000000000000e+00
+3.000000000000000000e+00,-3.000000000000000000e+00
+-2.000000000000000000e+00,-4.000000000000000000e+00
+-6.000000000000000000e+00,-5.000000000000000000e+00
+0.000000000000000000e+00,-3.000000000000000000e+00
+0.000000000000000000e+00,5.000000000000000000e+00
diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/params.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/params.txt
new file mode 100644
index 0000000..2b44c69
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/requantize_s64/params.txt
@@ -0,0 +1,12 @@
+2
+2
+3
+2
+2
+2
+1
+1
+0
+0
+1
+0
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/biases_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/biases_data.h
new file mode 100644
index 0000000..a4a87a4
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/biases_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int64_t int16xint8_biases[4] = {-260092, -1040368, -780276, -520184};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/config_data.h
new file mode 100644
index 0000000..79f7e52
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/config_data.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#define INT16XINT8_OUT_CH 4
+#define INT16XINT8_IN_CH 3
+#define INT16XINT8_INPUT_W 7
+#define INT16XINT8_INPUT_H 8
+#define INT16XINT8_DST_SIZE 48
+#define INT16XINT8_INPUT_SIZE 168
+#define INT16XINT8_OUT_ACTIVATION_MIN -32768
+#define INT16XINT8_OUT_ACTIVATION_MAX 32767
+#define INT16XINT8_INPUT_BATCHES 1
+#define INT16XINT8_INPUT_OFFSET 0
+#define INT16XINT8_OUTPUT_OFFSET 0
+#define INT16XINT8_FILTER_X 2
+#define INT16XINT8_FILTER_Y 4
+#define INT16XINT8_STRIDE_X 2
+#define INT16XINT8_STRIDE_Y 3
+#define INT16XINT8_PAD_X 0
+#define INT16XINT8_PAD_Y 1
+#define INT16XINT8_OUTPUT_W 4
+#define INT16XINT8_OUTPUT_H 3
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/input_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/input_data.h
new file mode 100644
index 0000000..abd5bd6
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/input_data.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t int16xint8_input[168] = {
+    0,      -16384, 16384,  -16384, -16384, -8192,  8192,   0,      -32768, 0,      -32768, -16384, 0,      -32768,
+    -16384, -32768, 8192,   0,      0,      -24576, 8192,   -8192,  -16384, 16384,  -32768, 8192,   -16384, -8192,
+    8192,   -16384, 16384,  -8192,  16384,  -32768, -24576, -8192,  -24576, -8192,  -16384, -32768, 0,      -8192,
+    -24576, 24576,  -16384, 16384,  24576,  8192,   -32768, -24576, -8192,  8192,   24576,  8192,   -24576, -16384,
+    -32768, 0,      0,      8192,   8192,   24576,  -8192,  -16384, -16384, -24576, 0,      16384,  24576,  -32768,
+    24576,  8192,   0,      -32768, -24576, 0,      -8192,  -24576, -16384, -16384, -8192,  16384,  8192,   -16384,
+    24576,  -24576, -32768, 24576,  0,      -32768, -16384, 0,      24576,  16384,  0,      -16384, 8192,   8192,
+    24576,  16384,  8192,   -8192,  -24576, -8192,  8192,   24576,  -24576, 16384,  8192,   0,      -16384, -16384,
+    0,      -8192,  -32768, 0,      -24576, -8192,  24576,  -8192,  8192,   -16384, 0,      -16384, -24576, 24576,
+    8192,   24576,  -24576, -32768, -24576, 0,      -8192,  16384,  0,      -32768, 16384,  8192,   -24576, 8192,
+    0,      8192,   -16384, -32768, 24576,  -8192,  -32768, 16384,  16384,  -32768, 0,      8192,   8192,   0,
+    -16384, -32768, 0,      -32768, 8192,   -24576, 8192,   16384,  16384,  0,      16384,  0,      8192,   16384};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_mult_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_mult_data.h
new file mode 100644
index 0000000..46aa18e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_mult_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t int16xint8_output_mult[4] = {1082246025, 1082246025, 1082246025, 1082246025};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_ref_data.h
new file mode 100644
index 0000000..a3fc92c
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_ref_data.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t int16xint8_output_ref[48] = {0,   -9, -6, -47, 2,  7,  15, 23,  27,  11, 1,   -13, 24, -5,  -8,  -6,
+                                         -36, 12, -1, 20,  5,  47, 62, 33,  26,  24, 39,  2,   0,  -32, -11, 37,
+                                         14,  -6, 6,  -6,  -3, 14, 20, -10, -11, 0,  -17, 33,  45, -6,  22,  7};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_shift_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_shift_data.h
new file mode 100644
index 0000000..d5e9299
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_shift_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t int16xint8_output_shift[4] = {-17, -17, -17, -17};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/test_data.h
new file mode 100644
index 0000000..1af706c
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/test_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/weights_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/weights_data.h
new file mode 100644
index 0000000..2b6e3a1
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/weights_data.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t int16xint8_weights[96] = {
+    32,   -127, 64,   -95, 0,    0,    95,   -64, -127, 0,   -64, -64,  -127, 32,  32,   -32, -95, -95, 64,  64,
+    0,    0,    -127, 0,   -95,  -95,  -127, -32, 0,    0,   -95, 0,    -64,  95,  -127, -32, 32,  0,   -64, -95,
+    -127, -64,  64,   -32, -127, -64,  -64,  0,   -32,  -95, 0,   -127, 64,   32,  64,   64,  -95, 32,  -64, -64,
+    32,   -64,  0,    95,  -127, -127, -95,  95,  -64,  -64, -64, -127, -64,  95,  32,   95,  95,  95,  0,   64,
+    -64,  -32,  -127, 64,  -127, 32,   64,   95,  -127, 64,  64,  -95,  95,   -64, -95,  95};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/biases_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/biases_data.h
new file mode 100644
index 0000000..b4c6a1e
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/biases_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int64_t requantize_s64_biases[2] = {2147483647, 2147483647};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/config_data.h
new file mode 100644
index 0000000..525b810
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/config_data.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#define REQUANTIZE_S64_OUT_CH 2
+#define REQUANTIZE_S64_IN_CH 2
+#define REQUANTIZE_S64_INPUT_W 3
+#define REQUANTIZE_S64_INPUT_H 2
+#define REQUANTIZE_S64_DST_SIZE 4
+#define REQUANTIZE_S64_INPUT_SIZE 12
+#define REQUANTIZE_S64_OUT_ACTIVATION_MIN -32768
+#define REQUANTIZE_S64_OUT_ACTIVATION_MAX 32767
+#define REQUANTIZE_S64_INPUT_BATCHES 1
+#define REQUANTIZE_S64_INPUT_OFFSET 0
+#define REQUANTIZE_S64_OUTPUT_OFFSET 0
+#define REQUANTIZE_S64_FILTER_X 2
+#define REQUANTIZE_S64_FILTER_Y 2
+#define REQUANTIZE_S64_STRIDE_X 1
+#define REQUANTIZE_S64_STRIDE_Y 1
+#define REQUANTIZE_S64_PAD_X 0
+#define REQUANTIZE_S64_PAD_Y 0
+#define REQUANTIZE_S64_OUTPUT_W 2
+#define REQUANTIZE_S64_OUTPUT_H 1
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/input_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/input_data.h
new file mode 100644
index 0000000..7a47dba
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/input_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t requantize_s64_input[12] = {-14, 10, -14, -12, -6, 14, -6, -8, 4, 8, 10, -14};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_mult_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_mult_data.h
new file mode 100644
index 0000000..74a3970
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_mult_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t requantize_s64_output_mult[2] = {1082196484, 1623294726};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_ref_data.h
new file mode 100644
index 0000000..3db6f0c
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_ref_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t requantize_s64_output_ref[4] = {32767, 32767, 32767, 32767};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_shift_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_shift_data.h
new file mode 100644
index 0000000..c9332d5
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/output_shift_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const int32_t requantize_s64_output_shift[2] = {-5, -5};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/test_data.h
new file mode 100644
index 0000000..1af706c
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/test_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/weights_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/weights_data.h
new file mode 100644
index 0000000..d59cb47
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/requantize_s64/weights_data.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.4.1 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t requantize_s64_weights[16] = {-127, 32, 32, 0, 95, 32, 95, -95, -42, -85, -127, -106, 0, -64, 0, 106};
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf/config_data.h
index 7a2d39a..1f55eec 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf/config_data.h
@@ -24,7 +24,7 @@
 #define SVDF_MULTIPLIER_OUT 1073741824
 #define SVDF_SHIFT_1 -4
 #define SVDF_SHIFT_2 1
-#define SVDF_IN_ACTIVATION_MIN -32767
+#define SVDF_IN_ACTIVATION_MIN -32768
 #define SVDF_IN_ACTIVATION_MAX 32767
 #define SVDF_RANK 8
 #define SVDF_FEATURE_BATCHES 24
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_1/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_1/config_data.h
index 2fd5bc6..2c43a13 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_1/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_1/config_data.h
@@ -24,7 +24,7 @@
 #define SVDF_1_MULTIPLIER_OUT 1073741824
 #define SVDF_1_SHIFT_1 -4
 #define SVDF_1_SHIFT_2 1
-#define SVDF_1_IN_ACTIVATION_MIN -32767
+#define SVDF_1_IN_ACTIVATION_MIN -32768
 #define SVDF_1_IN_ACTIVATION_MAX 32767
 #define SVDF_1_RANK 1
 #define SVDF_1_FEATURE_BATCHES 5
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_2/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_2/config_data.h
index e7e71b8..4a03e4b 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_2/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_2/config_data.h
@@ -24,7 +24,7 @@
 #define SVDF_2_MULTIPLIER_OUT 1073741824
 #define SVDF_2_SHIFT_1 -4
 #define SVDF_2_SHIFT_2 1
-#define SVDF_2_IN_ACTIVATION_MIN -32767
+#define SVDF_2_IN_ACTIVATION_MIN -32768
 #define SVDF_2_IN_ACTIVATION_MAX 32767
 #define SVDF_2_RANK 2
 #define SVDF_2_FEATURE_BATCHES 10
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_3/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_3/config_data.h
index 78d62a6..ead1935 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_3/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/svdf_3/config_data.h
@@ -24,7 +24,7 @@
 #define SVDF_3_MULTIPLIER_OUT 1073741824
 #define SVDF_3_SHIFT_1 -4
 #define SVDF_3_SHIFT_2 1
-#define SVDF_3_IN_ACTIVATION_MIN -32767
+#define SVDF_3_IN_ACTIVATION_MIN -32768
 #define SVDF_3_IN_ACTIVATION_MAX 32767
 #define SVDF_3_RANK 1
 #define SVDF_3_FEATURE_BATCHES 12
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/Utils/validate.h b/CMSIS/NN/Tests/UnitTest/TestCases/Utils/validate.h
index d7ae2d5..c9f3b11 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/Utils/validate.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/Utils/validate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -49,3 +49,32 @@
 
     return test_passed;
 }
+
+static inline int validate_s16(int16_t *act, const int16_t *ref, int size)
+{
+    int test_passed = true;
+    int count = 0;
+    int total = 0;
+
+    for (int i = 0; i < size; ++i)
+    {
+        total++;
+        if (act[i] != ref[i])
+        {
+            count++;
+            printf("ERROR at pos %d: Act: %d Ref: %d\r\n", i, act[i], ref[i]);
+            test_passed = false;
+        }
+        else
+        {
+            // printf("PASS at pos %d: %d\r\n", i, act[i]);
+        }
+    }
+
+    if (!test_passed)
+    {
+        printf("%d of %d failed\r\n", count, total);
+    }
+
+    return test_passed;
+}
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/CMakeLists.txt
new file mode 100644
index 0000000..5cfab1c
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_convolve_s16)
+
+target_sources(test_arm_convolve_s16 PRIVATE
+    Unity/unity_test_arm_convolve_s16.c
+    Unity/TestRunner/unity_test_arm_convolve_s16_runner.c)
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/Unity/unity_test_arm_convolve_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/Unity/unity_test_arm_convolve_s16.c
new file mode 100644
index 0000000..fb6c7db
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/Unity/unity_test_arm_convolve_s16.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_convolve_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_int16xint8_arm_convolve_s16(void) { int16xint8_arm_convolve_s16(); }
+void test_requantize_s64_arm_convolve_s16(void) { requantize_s64_arm_convolve_s16(); }
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/test_arm_convolve_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/test_arm_convolve_s16.c
new file mode 100644
index 0000000..3f3ae35
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_convolve_s16/test_arm_convolve_s16.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include <arm_nnfunctions.h>
+#include <unity.h>
+
+#include "../TestData/int16xint8/test_data.h"
+#include "../TestData/requantize_s64/test_data.h"
+#include "../Utils/validate.h"
+
+void int16xint8_arm_convolve_s16(void)
+{
+    q15_t output[INT16XINT8_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q63_t *bias_data = int16xint8_biases;
+    const q7_t *kernel_data = int16xint8_weights;
+    const q15_t *input_data = int16xint8_input;
+    const q15_t *output_ref = int16xint8_output_ref;
+    const int32_t output_ref_size = INT16XINT8_DST_SIZE;
+
+    input_dims.n = INT16XINT8_INPUT_BATCHES;
+    input_dims.w = INT16XINT8_INPUT_W;
+    input_dims.h = INT16XINT8_INPUT_H;
+    input_dims.c = INT16XINT8_IN_CH;
+    filter_dims.w = INT16XINT8_FILTER_X;
+    filter_dims.h = INT16XINT8_FILTER_Y;
+    output_dims.w = INT16XINT8_OUTPUT_W;
+    output_dims.h = INT16XINT8_OUTPUT_H;
+    output_dims.c = INT16XINT8_OUT_CH;
+
+    conv_params.padding.w = INT16XINT8_PAD_X;
+    conv_params.padding.h = INT16XINT8_PAD_Y;
+    conv_params.stride.w = INT16XINT8_STRIDE_X;
+    conv_params.stride.h = INT16XINT8_STRIDE_Y;
+
+    conv_params.input_offset = 0;
+    conv_params.output_offset = 0;
+    conv_params.activation.min = INT16XINT8_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = INT16XINT8_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)int16xint8_output_mult;
+    quant_params.shift = (int32_t *)int16xint8_output_shift;
+
+    int buf_size = arm_convolve_s16_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+
+    arm_status result = arm_convolve_s16(&ctx,
+                                         &conv_params,
+                                         &quant_params,
+                                         &input_dims,
+                                         input_data,
+                                         &filter_dims,
+                                         kernel_data,
+                                         &bias_dims,
+                                         bias_data,
+                                         &output_dims,
+                                         output);
+    free(ctx.buf);
+    TEST_ASSERT_EQUAL(ARM_MATH_SUCCESS, result);
+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
+
+    buf_size = arm_convolve_wrapper_s16_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
+    ctx.buf = malloc(buf_size);
+
+    result = arm_convolve_wrapper_s16(&ctx,
+                                      &conv_params,
+                                      &quant_params,
+                                      &input_dims,
+                                      input_data,
+                                      &filter_dims,
+                                      kernel_data,
+                                      &bias_dims,
+                                      bias_data,
+                                      &output_dims,
+                                      output);
+
+    free(ctx.buf);
+    TEST_ASSERT_EQUAL(ARM_MATH_SUCCESS, result);
+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
+}
+
+void requantize_s64_arm_convolve_s16(void)
+{
+    q15_t output[REQUANTIZE_S64_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q63_t *bias_data = requantize_s64_biases;
+    const q7_t *kernel_data = requantize_s64_weights;
+    const q15_t *input_data = requantize_s64_input;
+    const q15_t *output_ref = requantize_s64_output_ref;
+    const int32_t output_ref_size = REQUANTIZE_S64_DST_SIZE;
+
+    input_dims.n = REQUANTIZE_S64_INPUT_BATCHES;
+    input_dims.w = REQUANTIZE_S64_INPUT_W;
+    input_dims.h = REQUANTIZE_S64_INPUT_H;
+    input_dims.c = REQUANTIZE_S64_IN_CH;
+    filter_dims.w = REQUANTIZE_S64_FILTER_X;
+    filter_dims.h = REQUANTIZE_S64_FILTER_Y;
+    output_dims.w = REQUANTIZE_S64_OUTPUT_W;
+    output_dims.h = REQUANTIZE_S64_OUTPUT_H;
+    output_dims.c = REQUANTIZE_S64_OUT_CH;
+
+    conv_params.padding.w = REQUANTIZE_S64_PAD_X;
+    conv_params.padding.h = REQUANTIZE_S64_PAD_Y;
+    conv_params.stride.w = REQUANTIZE_S64_STRIDE_X;
+    conv_params.stride.h = REQUANTIZE_S64_STRIDE_Y;
+
+    conv_params.input_offset = REQUANTIZE_S64_INPUT_OFFSET;
+    conv_params.output_offset = REQUANTIZE_S64_OUTPUT_OFFSET;
+    conv_params.activation.min = REQUANTIZE_S64_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = REQUANTIZE_S64_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)requantize_s64_output_mult;
+    quant_params.shift = (int32_t *)requantize_s64_output_shift;
+
+    int buf_size = arm_convolve_s16_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+
+    arm_status result = arm_convolve_s16(&ctx,
+                                         &conv_params,
+                                         &quant_params,
+                                         &input_dims,
+                                         input_data,
+                                         &filter_dims,
+                                         kernel_data,
+                                         &bias_dims,
+                                         bias_data,
+                                         &output_dims,
+                                         output);
+    free(ctx.buf);
+    TEST_ASSERT_EQUAL(ARM_MATH_SUCCESS, result);
+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
+
+    buf_size = arm_convolve_wrapper_s16_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
+    ctx.buf = malloc(buf_size);
+
+    result = arm_convolve_wrapper_s16(&ctx,
+                                      &conv_params,
+                                      &quant_params,
+                                      &input_dims,
+                                      input_data,
+                                      &filter_dims,
+                                      kernel_data,
+                                      &bias_dims,
+                                      bias_data,
+                                      &output_dims,
+                                      output);
+
+    free(ctx.buf);
+    TEST_ASSERT_EQUAL(ARM_MATH_SUCCESS, result);
+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
+}
diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c
index dd5b7bb..41a5f78 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_s8/test_arm_svdf_s8.c
@@ -86,7 +86,7 @@
     const int scratch_size_out = SVDF_INPUT_BATCHES * number_units * sizeof(int32_t);
 
     input_ctx.buf = malloc(scratch_size);
-    output_ctx.buf = malloc(scratch_size);
+    output_ctx.buf = malloc(scratch_size_out);
 
     int8_t *input_data = malloc(input_round_size);
     q15_t *state_data = malloc(sizeof(svdf_state));
diff --git a/CMSIS/NN/Tests/UnitTest/generate_test_data.py b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
index d09bdf1..b18196b 100755
--- a/CMSIS/NN/Tests/UnitTest/generate_test_data.py
+++ b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
@@ -57,6 +57,13 @@
  */
 """
 
+INT32_MAX = 2147483647
+INT32_MIN = -2147483648
+INT16_MAX = 32767
+INT16_MIN = -32768
+INT8_MAX = 127
+INT8_MIN = -128
+
 
 def parse_args():
     parser = argparse.ArgumentParser(description="Generate input and refererence output data for unittests."
@@ -85,22 +92,13 @@
 
     # This is input to the data generation. If everything or something is regenerated then it is overwritten.
     # So it always has the same data as the OUTDIR.
-    # The purpose of the pregen is primarily for debugging, as it enables to change a single parameter and see how the
-    # output changes, without regenerating all input data.
+    # The purpose of the pregen is primarily for debugging, as it is enabling to change a single parameter and see how
+    # output changes (or not changes), without regenerating all input data.
     # It also convinient when tesing changes in the script, to be able to run all test sets again.
     PREGEN = 'PregeneratedData/'
 
-    INT32_MAX = 2147483647
-    INT32_MIN = -2147483648
-    INT16_MAX = 32767
-    INT16_MIN = -32767
-    INT8_MAX = 127
-    INT8_MIN = -128
-    UINT8_MAX = 255
-    UINT8_MIN = 0
-
     def __init__(self, dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad, randmin,
-                 randmax, outminrange=-128, outmaxrange=127, batches=1, generate_bias=True, relu6=False,
+                 randmax, outminrange=INT8_MIN, outmaxrange=INT8_MAX, batches=1, generate_bias=True, relu6=False,
                  out_activation_min=None, out_activation_max=None):
 
         self.tensor_flow_reference_version = ("// Generated by {} using TFL version {} as reference.\n".
@@ -109,6 +107,8 @@
         # Randomization interval
         self.mins = randmin
         self.maxs = randmax
+        self.bias_mins = randmin
+        self.bias_maxs = randmax
 
         self.relu6 = relu6
         self.input_ch = in_ch
@@ -126,11 +126,11 @@
         if out_activation_min:
             self.out_activation_min = out_activation_min
         else:
-            self.out_activation_min = self.INT8_MIN
+            self.out_activation_min = INT8_MIN
         if out_activation_max:
             self.out_activation_max = out_activation_max
         else:
-            self.out_activation_max = self.INT8_MAX
+            self.out_activation_max = INT8_MAX
 
         minrange = randmin - 1
         maxrange = randmax + 1
@@ -174,18 +174,18 @@
     def clamp(self, result, smallest, largest):
         return max(smallest, min(result, largest))
 
-    def quantize_input(self, value):
+    def quantize_input(self, value, quant_min_value=INT8_MIN, quant_max_value=INT8_MAX):
         result = round(value / self.input_scale) + self.input_zero_point
-        return self.clamp(result, self.INT8_MIN, self.INT8_MAX)
+        return self.clamp(result, quant_min_value, quant_max_value)
 
-    def derive_scale_from_min_max(self, minrange, maxrange):
-        scale = (maxrange - minrange) / ((self.INT8_MAX * 1.0) - self.INT8_MIN)
+    def derive_scale_from_min_max(self, minrange, maxrange, quant_min_value=INT8_MIN, quant_max_value=INT8_MAX):
+        scale = (maxrange - minrange) / ((quant_max_value * 1.0) - quant_min_value)
         return scale
 
     def derive_scale_and_zeropoint_from_min_max(self, minrange, maxrange):
         scale = self.derive_scale_from_min_max(minrange, maxrange)
-        zeropoint = self.INT8_MIN + int(-minrange / scale + 0.5)
-        zeropoint = max(self.INT8_MIN, min(zeropoint, -self.INT8_MIN))
+        zeropoint = INT8_MIN + int(-minrange / scale + 0.5)
+        zeropoint = max(INT8_MIN, min(zeropoint, -INT8_MIN))
         return (scale, zeropoint)
 
     def save_multiple_dim_array_in_txt(self, file, data):
@@ -213,15 +213,18 @@
          self.stride_x, self.stride_y, self.pad_x, self.pad_y, self.batches, self.has_padding) = \
             (map(lambda x: x, params))
 
-    def convert_tensor_np(self, tensor_in, converter):
+    def convert_tensor_np(self, tensor_in, converter, *qminmax):
         w = tensor_in.numpy()
         shape = w.shape
         w = w.ravel()
-        fw = converter(w)
+        if len(qminmax) == 2:
+            fw = converter(w, qminmax[0], qminmax[1])
+        else:
+            fw = converter(w)
         fw.shape = shape
         return tf.convert_to_tensor(fw)
 
-    def convert_tensor(self, tensor_in, converter, params=None):
+    def convert_tensor(self, tensor_in, converter, *qminmax):
         w = tensor_in.numpy()
         shape = w.shape
         w = w.ravel()
@@ -229,8 +232,8 @@
         float_normal = []
 
         for i in normal:
-            if params:
-                float_normal.append(converter(i, params))
+            if len(qminmax) == 2:
+                float_normal.append(converter(i, qminmax[0], qminmax[1]))
             else:
                 float_normal.append(converter(i))
 
@@ -249,7 +252,7 @@
             if not os.path.exists(regendir):
                 os.makedirs(regendir)
             if decimals == 0:
-                data = tf.Variable(tf.random.uniform(dims, minval=minrange, maxval=maxrange, dtype=tf.dtypes.int32))
+                data = tf.Variable(tf.random.uniform(dims, minval=minrange, maxval=maxrange, dtype=tf.dtypes.int64))
                 data = tf.cast(data, dtype=tf.float32)
             else:
                 data = tf.Variable(tf.random.uniform(dims, minval=minrange, maxval=maxrange, dtype=tf.dtypes.float32))
@@ -282,7 +285,9 @@
         else:
             biases = self.get_randomized_data([self.output_ch],
                                               self.bias_table_file,
-                                              regenerate=self.regenerate_new_bias)
+                                              regenerate=self.regenerate_new_bias,
+                                              minrange=self.bias_mins,
+                                              maxrange=self.bias_maxs)
         return biases
 
     def format_output_file(self, file):
@@ -457,13 +462,36 @@
 class ConvSettings(TestSettings):
 
     def __init__(self, dataset, testtype, args, in_ch=1, out_ch=1, x_in=7, y_in=7, w_x=3, w_y=3, stride_x=2, stride_y=2,
-                 pad=True, randmin=-7, randmax=7, outminrange=-128, outmaxrange=127, batches=1, generate_bias=True,
-                 relu6=False, out_activation_min=None, out_activation_max=None):
+                 pad=True, randmin=-7, randmax=7, outminrange=INT8_MIN, outmaxrange=INT8_MAX, batches=1,
+                 generate_bias=True, relu6=False, out_activation_min=None, out_activation_max=None,
+                 int16xint8=False, input_scale=None, bias_min=None, bias_max=None):
         super().__init__(dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad,
                          randmin, randmax, outminrange, outmaxrange, batches, generate_bias=generate_bias, relu6=relu6,
                          out_activation_min=out_activation_min, out_activation_max=out_activation_max)
 
         self.scaling_factors = []
+        self.is_int16xint8 = int16xint8
+
+        if bias_min:
+            self.bias_mins = bias_min
+        if bias_max:
+            self.bias_maxs = bias_max
+        if self.is_int16xint8:
+            if input_scale:
+                self.input_scale = input_scale
+            else:
+                self.input_scale = self.derive_scale_from_min_max(self.mins, self.maxs, INT16_MIN, INT16_MAX)
+            self.input_zero_point = 0
+            self.output_scale = self.derive_scale_from_min_max(outminrange, outmaxrange, INT16_MIN, INT16_MAX)
+            self.output_zero_point = 0
+            if out_activation_min:
+                self.out_activation_min = out_activation_min
+            else:
+                self.out_activation_min = INT16_MIN
+            if out_activation_max:
+                self.out_activation_max = out_activation_max
+            else:
+                self.out_activation_max = INT16_MAX
 
         if self.test_type == 'conv':
             self.quantized_dimension = 0
@@ -489,7 +517,7 @@
             if self.test_type == 'depthwise_conv':
                 f.write("#define {}_CH_MULT {}\n".format(prefix, self.channel_multiplier))
 
-    def quantize_bias(self, nparray):
+    def quantize_bias(self, nparray, quant_min_value=INT16_MIN, quant_max_value=INT16_MAX):
         num_channels = self.output_ch
         quantized_values = []
 
@@ -500,10 +528,10 @@
                 print("WARNING: scale is 0")
                 scale = 0.0000001
             quantized = round(value / scale)
-            if quantized > self.INT16_MAX:
-                quantized = self.INT16_MAX
-            elif quantized < self.INT16_MIN:
-                quantized = self.INT16_MIN
+            if quantized > quant_max_value:
+                quantized = quant_max_value
+            elif quantized < quant_min_value:
+                quantized = quant_min_value
             return quantized
 
         for x in range(num_channels):
@@ -539,7 +567,7 @@
                 fmin = min(fmin, values[idx])
                 fmax = max(fmax, values[idx])
 
-            self.scaling_factors.append(max(abs(fmin), abs(fmax)) / self.INT8_MAX)
+            self.scaling_factors.append(max(abs(fmin), abs(fmax)) / INT8_MAX)
 
             for x in range(per_channel_size):
                 chs = channel * channel_stride + x * stride
@@ -591,6 +619,21 @@
         # Tensorflow Lite has a different kernel format compared to Tensorflow
         reshaped_weights = None
 
+        if self.is_int16xint8:
+            quant_max_value = INT16_MAX
+            quant_min_value = INT16_MIN
+            quant_bias_max_value = INT32_MAX
+            quant_bias_min_value = INT32_MIN
+            datatype = "q15_t"
+            bias_datatype = "int64_t"
+        else:
+            quant_bias_max_value = INT16_MAX
+            quant_bias_min_value = INT16_MIN
+            quant_max_value = INT8_MAX
+            quant_min_value = INT8_MIN
+            datatype = "q7_t"
+            bias_datatype = "int32_t"
+
         input_data = self.get_randomized_input_data(input_data)
 
         if self.test_type == 'conv':
@@ -619,11 +662,19 @@
             conv = self.depthwise_conv2d(input_data, reshaped_weights, biases)
 
         # Quantize and write to C headers
-        self.generate_c_array("input", self.convert_tensor(input_data, self.quantize_input))
+        self.generate_c_array("input",
+                              self.convert_tensor(input_data,
+                                                  self.quantize_input,
+                                                  quant_min_value,
+                                                  quant_max_value),
+                              datatype=datatype)
         self.generate_c_array("weights", self.convert_tensor_np(weights, self.quantize_filter))
-        self.generate_c_array("biases", self.convert_tensor_np(biases, self.quantize_bias), "int32_t")
+        self.generate_c_array("biases", self.convert_tensor_np(biases,
+                                                               self.quantize_bias,
+                                                               quant_bias_min_value,
+                                                               quant_bias_max_value), bias_datatype)
         self.generate_quantize_per_channel_multiplier()
-        self.generate_c_array("output_ref", self.convert_tensor(conv, self.quantize_output))
+        self.generate_c_array("output_ref", self.convert_tensor(conv, self.quantize_output), datatype=datatype)
 
         self.write_c_config_header()
         self.write_c_header_wrapper()
@@ -692,7 +743,7 @@
 class FullyConnectedSettings(TestSettings):
 
     def __init__(self, dataset, testtype, args, in_ch=1, out_ch=1, x_in=1, y_in=1, w_x=1, w_y=1, stride_x=1, stride_y=1,
-                 pad=False, randmin=-4, randmax=4, outminrange=-128, outmaxrange=127, batches=1, input_scale=1.0,
+                 pad=False, randmin=-4, randmax=4, outminrange=INT8_MIN, outmaxrange=INT8_MAX, batches=1, input_scale=1.0,
                  input_zero_point=0, weights_scale=1.0, bias_scale=1.0, output_scale=1.0,
                  output_zero_point=0, generate_bias=True, out_activation_min=None, out_activation_max=None):
         super().__init__(dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad, randmin,
@@ -733,16 +784,16 @@
 
     def derive_filter_scale_and_zeropoint_from_min_max(self, mini, maxi):
         scale = self.derive_scale_from_min_max(mini, maxi)
-        zero = int(self.INT8_MIN + (-mini/scale + 0.5))
+        zero = int(INT8_MIN + (-mini/scale + 0.5))
         return (scale, zero)
 
     def quantize_bias(self, value):
         result = int(value / self.bias_scale)
-        return self.clamp(result, self.INT32_MIN, self.INT32_MAX)
+        return self.clamp(result, INT32_MIN, INT32_MAX)
 
     def quantize_weights(self, value):
         result = round(value / self.weights_scale)
-        return self.clamp(result, self.INT8_MIN, self.INT8_MAX)
+        return self.clamp(result, INT8_MIN, INT8_MAX)
 
     def generate_data(self, input_data=None, weights=None, biases=None):
         input_data = self.get_randomized_input_data(input_data)
@@ -776,7 +827,7 @@
         super().__init__(dataset, testtype, args, 1, 1, x_in, y_in, 1, 1, 1, 1, False, randmin,
                          randmax)
         self.output_scale = 1 / 256
-        self.output_zero_point = -128
+        self.output_zero_point = INT8_MIN
         self.x_input = self.x_output = x_in
         self.y_input = self.y_output = y_in
 
@@ -859,8 +910,8 @@
         (self.multiplier_in, self.shift_1) = self.quantize_scale(effective_scale_1)
         (self.multiplier_out, self.shift_2) = self.quantize_scale(effective_scale_2)
 
-        self.in_activation_max = self.INT16_MAX
-        self.in_activation_min = self.INT16_MIN
+        self.in_activation_max = INT16_MAX
+        self.in_activation_min = INT16_MIN
 
     def write_c_config_header(self):
         super().write_c_config_header(write_common_parameters=False)
@@ -888,15 +939,15 @@
 
     def quantize_weights_feature(self, value):
         result = round(value / self.weights_feature_scale)
-        return self.clamp(result, self.INT8_MIN, self.INT8_MAX)
+        return self.clamp(result, INT8_MIN, INT8_MAX)
 
     def quantize_weights_time(self, value):
         result = round(value / self.weights_time_scale)
-        return self.clamp(result, self.INT16_MIN, self.INT16_MAX)
+        return self.clamp(result, INT16_MIN, INT16_MAX)
 
     def quantize_state(self, value):
         result = round(value / self.state_scale)
-        return self.clamp(result, self.INT16_MIN, self.INT16_MAX)
+        return self.clamp(result, INT16_MIN, INT16_MAX)
 
     def quantize_bias(self, value):
         result = round(value / self.bias_scale)
@@ -1080,6 +1131,16 @@
     ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=2, out_ch=2, x_in=3, y_in=3, w_x=3,
                                               w_y=3, stride_x=1, stride_y=1, pad=True, randmin=-5, randmax=5,
                                               out_activation_min=-55, out_activation_max=55)
+    dataset = 'int16xint8'
+    ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=3, out_ch=4, x_in=7,
+                                              y_in=8, w_x=2, w_y=4, stride_x=2, stride_y=3, pad=True,
+                                              randmin=-4, randmax=4, outminrange=-32766, outmaxrange=32767,
+                                              int16xint8=True)
+    dataset = 'requantize_s64'
+    ALL_TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=2, out_ch=2, x_in=3,
+                                              y_in=2, w_x=2, w_y=2, stride_x=1, stride_y=1, pad=False,
+                                              randmin=-7, randmax=8, outminrange=INT16_MIN, outmaxrange=INT16_MAX,
+                                              int16xint8=True, input_scale=0.5, bias_min=-0x300, bias_max=0x9fffffff)
 
     type_of_test = 'depthwise_conv'
     dataset = 'depthwise_2'
diff --git a/CMSIS/NN/Tests/UnitTest/unittest_targets.py b/CMSIS/NN/Tests/UnitTest/unittest_targets.py
index 1a036f4..a9db0b0 100755
--- a/CMSIS/NN/Tests/UnitTest/unittest_targets.py
+++ b/CMSIS/NN/Tests/UnitTest/unittest_targets.py
@@ -305,7 +305,7 @@
         verdict = verdict_pass
     else:
         verdict = verdict_fail
-    print("{} Summary: {} tests in total passed on {} targets ({})".
+    print("{} Summary: {} tests in total passed on {} target(s) ({})".
           format(verdict, passed, len(targets), ', '.join([t['name'] for t in targets])))
     print("{} {:.0f}% tests passed, {} tests failed out of {}".format(verdict, total*100, failed, expected))