CMSIS-NN: Adds support for int16 fully connected * Adds arm_fully_connected_s16() and arm_nn_vec_mat_mult_t_s16(). * Adds unit tests for this. Change-Id: Ie9980a6ba8c80d0cf4898777dde90735a722ddd1

commit: 0fe4e1c166dda73df2b3ed6cb7ec42ef97c2c49a [log] [tgz]
author: Måns Nilsson <mans.nilsson@arm.com> Mon Jun 21 15:05:46 2021 +0200
committer: felix-johnny <48442848+felix-johnny@users.noreply.github.com> Wed Aug 18 10:25:27 2021 +0100
tree: 75051188a1952986689292b246d1d2792caa8803
parent: 78ed0ac442ab11662dae9e4995ea1b16f5e4a2cc [diff]
diff --git a/ARM.CMSIS.pdsc b/ARM.CMSIS.pdsc
index eef3f2e..c9d4551 100644
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc

@@ -12,7 +12,7 @@
       Active development ...
       CMSIS-DSP: 1.10.0 (see revision history for details)
       CMSIS-NN: 3.1.0 (see revision history for details)
-       - Support for int16 convolution
+       - Support for int16 convolution and fully connected for reference implementation
        - Support for DSP extension optimization for int16 convolution
     </release>
     <release version="5.8.0" date="2021-06-24">
@@ -2851,6 +2851,7 @@
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c"/>
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c"/>
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c"/>
+        <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c"/>
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c"/>
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c"/>
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c"/>
@@ -2866,6 +2867,7 @@
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c"/>
         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c"/>
         <file category="source" name="CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c"/>
+        <file category="source" name="CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c"/>
         <file category="source" name="CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c"/>
         <file category="source" name="CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c"/>
         <file category="source" name="CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c"/>

diff --git a/CMSIS/DoxyGen/NN/src/history.txt b/CMSIS/DoxyGen/NN/src/history.txt
index a5a0f5d..f95a894 100644
--- a/CMSIS/DoxyGen/NN/src/history.txt
+++ b/CMSIS/DoxyGen/NN/src/history.txt

@@ -14,6 +14,10 @@
       <li> Added arm_nn_mat_mult_kernel_s16 DSP implementation </li>
       <li> Added conv int16 DSP implementation </li>
       <li> Added unit tests for int16 DSP conv kernel </li>
+      <li> Added arm_fully_connected_s16 and arm_fully_connected_wrapper_s16<br>
+      with support for int16.<br></li>
+      <li> Added fully_connected int16 reference implementation </li>
+      <li> Added unit tests for int16 fully_connected kernel </li>
       </ul>
     </td>
   </tr>

diff --git a/CMSIS/NN/Include/arm_nnfunctions.h b/CMSIS/NN/Include/arm_nnfunctions.h
index f68c497..8a70abc 100644
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        11 August 2021
- * $Revision:    V.7.2.0
+ * $Date:        13 August 2021
+ * $Revision:    V.7.3.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -1281,7 +1281,7 @@
  *                                definition file to see if an additional buffer is required.
  *                                Optional function {API}_get_buffer_size() provides the buffer
  *                                size if an additional buffer is required.
- * @param[in]      fc_params      Fully Connected layer parameters (e.g. strides, dilations, pads,...)
+ * @param[in]      fc_params      Fully Connected layer parameters.
  *                                Range of fc_params->input_offset  : [-127, 128]
  *                                fc_params->filter_offset : 0
  *                                Range of fc_params->output_offset : [-128, 127]
@@ -1331,6 +1331,62 @@
 int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims);
 
 /**
+ * @brief Basic s16 Fully Connected function.
+ *
+ * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
+ *                                definition file to see if an additional buffer is required.
+ *                                Optional function {API}_get_buffer_size() provides the buffer
+ *                                size if an additional buffer is required.
+ * @param[in]      fc_params      Fully Connected layer parameters.
+ *                                fc_params->input_offset  : 0
+ *                                fc_params->filter_offset : 0
+ *                                fc_params->output_offset : 0
+ * @param[in]      quant_params   Per-tensor quantization info.
+ *                                It contains the multiplier and shift values to be applied to the output tensor.
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ *                                Input dimension is taken as Nx(H * W * C_IN)
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Two dimensional filter dimensions. Format: [N, C]
+ *                                N : accumulation depth and equals (H * W * C_IN) from input_dims
+ *                                C : output depth and equals C_OUT in output_dims
+ *                                H & W : Not used
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ *                                N, H, W : Not used
+ * @param[in]      bias_data      Bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, C_OUT]
+ *                                N : Batches
+ *                                C_OUT : Output depth
+ *                                H & W : Not used.
+ * @param[in, out] output_data    Output data pointer. Data type: int16
+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs.
+ */
+arm_status arm_fully_connected_s16(const cmsis_nn_context *ctx,
+                                   const cmsis_nn_fc_params *fc_params,
+                                   const cmsis_nn_per_tensor_quant_params *quant_params,
+                                   const cmsis_nn_dims *input_dims,
+                                   const q15_t *input_data,
+                                   const cmsis_nn_dims *filter_dims,
+                                   const q7_t *filter_data,
+                                   const cmsis_nn_dims *bias_dims,
+                                   const int64_t *bias_data,
+                                   const cmsis_nn_dims *output_dims,
+                                   q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S16 basic fully-connected and
+ * matrix multiplication layer function for TF Lite
+ * @param[in]      filter_dims             dimension of filter
+ * @return         The function returns    required buffer size in bytes
+ *
+ */
+int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims);
+
+/**
  * @brief Q7 opt fully-connected layer function
  * @param[in]       pV          pointer to input vector
  * @param[in]       pM          pointer to matrix weights

diff --git a/CMSIS/NN/Include/arm_nnsupportfunctions.h b/CMSIS/NN/Include/arm_nnsupportfunctions.h
index b9db880..ff2a49e 100644
--- a/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/CMSIS/NN/Include/arm_nnsupportfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        12 August 2021
- * $Revision:    V.5.8.0
+ * $Date:        13. Aug 2021
+ * $Revision:    V.5.9.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -406,6 +406,34 @@
                                     const int32_t activation_max);
 
 /**
+ * @brief s16 Vector by Matrix (transposed) multiplication
+ *
+ * @param[in]      lhs             Input left-hand side vector
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[in]      bias            Input bias
+ * @param[out]     dst             Output vector
+ * @param[in]      dst_multiplier  Output multiplier
+ * @param[in]      dst_shift       Output shift
+ * @param[in]      rhs_cols        Number of columns in the right-hand side input matrix
+ * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int16
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int16
+ *
+ * @return         The function returns <code>ARM_MATH_SUCCESS</code>
+ *
+ */
+arm_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs,
+                                     const q7_t *rhs,
+                                     const q63_t *bias,
+                                     q15_t *dst,
+                                     const int32_t dst_multiplier,
+                                     const int32_t dst_shift,
+                                     const int32_t rhs_cols,
+                                     const int32_t rhs_rows,
+                                     const int32_t activation_min,
+                                     const int32_t activation_max);
+
+/**
  * @brief s8 Vector by Matrix (transposed) multiplication with s16 output
  *
  * @param[in]      lhs             Input left-hand side vector

diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c
index b8df42b..20b77d6 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c

@@ -21,8 +21,8 @@
  * Title:        arm_convolve_s16.c
  * Description:  s16 version of convolution using symmetric quantization.
  *
- * $Date:        July 5, 2021
- * $Revision:    V.1.0.0
+ * $Date:        August 3, 2021
+ * $Revision:    V.1.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -90,7 +90,7 @@
 
         for (int32_t i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
         {
-            q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i_out_ch]);
+            const q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i_out_ch]);
 
             for (int32_t base_idx_y = -pad_y, i_out_y = 0; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
             {

diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt b/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt
index e9d4985..d4d2e33 100644
--- a/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt

@@ -20,6 +20,7 @@
 
 file(GLOB SRC "./*_s8.c")
 add_library(CMSISNNFullyConnected STATIC ${SRC})
+target_sources(CMSISNNFullyConnected PUBLIC arm_fully_connected_s16.c)
 
 ### Includes
 target_include_directories(CMSISNNFullyConnected PUBLIC "${NN}/Include")

diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c
new file mode 100644
index 0000000..46df578
--- /dev/null
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c

@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_fully_connected_s16
+ * Description:  Fully connected function compatible with TF Lite.
+ *
+ * $Date:        3. August 2021
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M and Cortex-A cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+/*
+ * S16 basic fully-connected and matrix multiplication layer function for TensorFlow Lite
+ *
+ * Refer header file for details.
+ *
+ */
+arm_status arm_fully_connected_s16(const cmsis_nn_context *ctx,
+                                   const cmsis_nn_fc_params *fc_params,
+                                   const cmsis_nn_per_tensor_quant_params *quant_params,
+                                   const cmsis_nn_dims *input_dims,
+                                   const q15_t *input,
+                                   const cmsis_nn_dims *filter_dims,
+                                   const q7_t *kernel,
+                                   const cmsis_nn_dims *bias_dims,
+                                   const int64_t *bias,
+                                   const cmsis_nn_dims *output_dims,
+                                   q15_t *output)
+{
+    (void)bias_dims;
+    (void)ctx;
+    (void)fc_params->filter_offset;
+
+    int32_t batch_cnt = input_dims->n;
+
+    const q31_t reduced_multiplier = REDUCE_MULTIPLIER(quant_params->multiplier);
+
+    while (batch_cnt)
+    {
+        arm_nn_vec_mat_mult_t_s16(input,
+                                  kernel,
+                                  bias,
+                                  output,
+                                  reduced_multiplier,
+                                  quant_params->shift,
+                                  filter_dims->n, /* col_dim or accum_depth */
+                                  output_dims->c, /* row_dim or output_depth */
+                                  fc_params->activation.min,
+                                  fc_params->activation.max);
+        input += filter_dims->n;
+        output += output_dims->c;
+        batch_cnt--;
+    }
+
+    return (ARM_MATH_SUCCESS);
+}
+
+int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims)
+{
+    (void)filter_dims;
+    return 0;
+}
+
+/**
+ * @} end of FC group
+ */

diff --git a/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt b/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt
index 255bce8..fbaef32 100644
--- a/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt
+++ b/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt

@@ -20,7 +20,10 @@
 
 file(GLOB SRC "./*_s8.c")
 add_library(CMSISNNSupport STATIC ${SRC})
-target_sources(CMSISNNSupport PUBLIC arm_q7_to_q15_with_offset.c arm_nn_mat_mul_kernel_s16.c)
+
+target_sources(CMSISNNSupport PUBLIC arm_q7_to_q15_with_offset.c
+                                     arm_nn_mat_mul_kernel_s16.c
+                                     arm_nn_vec_mat_mult_t_s16.c)
 
 ### Includes
 target_include_directories(CMSISNNSupport PUBLIC "${NN}/Include")

diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c
new file mode 100644
index 0000000..b5405c5
--- /dev/null
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c

@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_vec_mat_mult_t_s16
+ * Description:  s16 vector by matrix (transposed) multiplication
+ *
+ * $Date:        13. August 2021
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup NNBasicMath
+ * @{
+ */
+
+/*
+ * s16 vector(lhs) by matrix (transposed) multiplication
+ *
+ * Refer header file for details.
+ *
+ */
+arm_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs,
+                                     const q7_t *rhs,
+                                     const q63_t *bias,
+                                     q15_t *dst,
+                                     const int32_t dst_multiplier,
+                                     const int32_t dst_shift,
+                                     const int32_t rhs_cols,
+                                     const int32_t rhs_rows,
+                                     const int32_t activation_min,
+                                     const int32_t activation_max)
+{
+    int32_t row_loop_cnt = rhs_rows / 2;
+
+    for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++)
+    {
+        const q15_t *lhs_ptr = lhs;
+        const q7_t *rhs_ptr_0 = &rhs[0];
+        const q7_t *rhs_ptr_1 = &rhs[rhs_cols];
+
+        q63_t res00 = 0;
+        q63_t res01 = 0;
+
+        if (bias)
+        {
+            res00 = *bias++;
+            res01 = *bias++;
+        }
+        for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
+        {
+            const q63_t rhs_value0 = (int8_t)*rhs_ptr_0;
+            const q63_t rhs_value1 = (int8_t)*rhs_ptr_1;
+            const q63_t lhs_value = *lhs_ptr;
+
+            res00 += lhs_value * rhs_value0;
+            res01 += lhs_value * rhs_value1;
+
+            ++rhs_ptr_0;
+            ++rhs_ptr_1;
+            ++lhs_ptr;
+        }
+
+        // Quantize down
+        res00 = arm_nn_requantize_s64(res00, dst_multiplier, dst_shift);
+        res01 = arm_nn_requantize_s64(res01, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        res00 = MAX(res00, activation_min);
+        res00 = MIN(res00, activation_max);
+        res01 = MAX(res01, activation_min);
+        res01 = MIN(res01, activation_max);
+
+        *dst++ = (q15_t)res00;
+        *dst++ = (q15_t)res01;
+
+        rhs += 2 * rhs_cols;
+    }
+
+    const int loop_cnt = rhs_rows % 2;
+
+    for (int i_loop_cnt = 0; i_loop_cnt < loop_cnt; i_loop_cnt++)
+    {
+        const q15_t *lhs_ptr = &lhs[0];
+        const q7_t *rhs_ptr = &rhs[0];
+
+        q63_t res00 = 0;
+        if (bias)
+        {
+            res00 = *bias++;
+        }
+
+        for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
+        {
+            q31_t rhs_value0 = (int8_t)rhs_ptr[0];
+            q31_t lhs_value = (int16_t)lhs_ptr[0];
+
+            res00 += lhs_value * rhs_value0;
+
+            ++rhs_ptr;
+            ++lhs_ptr;
+        }
+
+        // Quantize down
+        res00 = arm_nn_requantize_s64(res00, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        res00 = MAX(res00, activation_min);
+        res00 = MIN(res00, activation_max);
+
+        *dst++ = (q15_t)res00;
+        rhs += rhs_cols;
+    }
+
+    return ARM_MATH_SUCCESS;
+}
+
+/**
+ * @} end of NNBasicMath group
+ */

diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
index 9ee2179..8b3e1c4 100644
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c

@@ -21,8 +21,8 @@
  * Title:        arm_nn_vec_mat_mult_t_s8
  * Description:  s8 vector by matrix (transposed) multiplication
  *
- * $Date:        02. May 2021
- * $Revision:    V.2.5.0
+ * $Date:        3. August 2021
+ * $Revision:    V.2.5.1
  *
  * Target Processor:  Cortex-M
  *
@@ -361,7 +361,7 @@
 
         for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
         {
-            q31_t rhs_value0 = (int8_t)rhs_ptr[0] + rhs_offset;
+            q31_t rhs_value0 = (int8_t)rhs_ptr[0];
             q31_t lhs_value = (int8_t)lhs_ptr[0] + lhs_offset;
 
             res00 += lhs_value * rhs_value0;

diff --git a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
index b9afb8b..cc9d7d3 100644
--- a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
+++ b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt

@@ -75,6 +75,7 @@
 add_subdirectory(TestCases/test_arm_depthwise_conv_s8)
 add_subdirectory(TestCases/test_arm_depthwise_conv_s8_opt)
 add_subdirectory(TestCases/test_arm_fully_connected_s8)
+add_subdirectory(TestCases/test_arm_fully_connected_s16)
 add_subdirectory(TestCases/test_arm_max_pool_s8)
 add_subdirectory(TestCases/test_arm_softmax_s8)
 add_subdirectory(TestCases/test_arm_svdf_s8)

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/bias.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/bias.txt
new file mode 100644
index 0000000..1b80464
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/bias.txt

@@ -0,0 +1,2 @@
+# 11
+-5.000000000000000000e+00,4.500000000000000000e+01,5.300000000000000000e+01,-3.300000000000000000e+01,3.100000000000000000e+01,5.100000000000000000e+01,4.300000000000000000e+01,3.500000000000000000e+01,3.700000000000000000e+01,-1.000000000000000000e+00,4.600000000000000000e+01

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/input.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/input.txt
new file mode 100644
index 0000000..dc32ba9
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/input.txt

@@ -0,0 +1,19 @@
+# 2,3,3,7
+-3.000000000000000000e+00,-3.800000000000000000e+01,2.400000000000000000e+01,8.000000000000000000e+00,-1.100000000000000000e+01,-4.300000000000000000e+01,4.700000000000000000e+01
+4.500000000000000000e+01,6.000000000000000000e+00,-1.000000000000000000e+01,-3.000000000000000000e+00,-5.200000000000000000e+01,-2.300000000000000000e+01,9.000000000000000000e+00
+3.200000000000000000e+01,3.000000000000000000e+00,-7.000000000000000000e+00,-2.000000000000000000e+00,-5.000000000000000000e+00,2.800000000000000000e+01,-4.000000000000000000e+01
+-2.100000000000000000e+01,-3.900000000000000000e+01,6.000000000000000000e+00,-4.500000000000000000e+01,3.500000000000000000e+01,-8.000000000000000000e+00,-1.600000000000000000e+01
+-1.600000000000000000e+01,2.000000000000000000e+00,-9.000000000000000000e+00,-4.200000000000000000e+01,-3.500000000000000000e+01,1.200000000000000000e+01,-3.700000000000000000e+01
+3.300000000000000000e+01,1.400000000000000000e+01,-4.700000000000000000e+01,-3.200000000000000000e+01,-3.800000000000000000e+01,4.000000000000000000e+01,1.300000000000000000e+01
+1.200000000000000000e+01,4.600000000000000000e+01,1.200000000000000000e+01,-4.600000000000000000e+01,2.700000000000000000e+01,-4.200000000000000000e+01,3.300000000000000000e+01
+3.000000000000000000e+00,-4.300000000000000000e+01,5.300000000000000000e+01,3.000000000000000000e+00,-5.000000000000000000e+01,-3.500000000000000000e+01,2.700000000000000000e+01
+-3.000000000000000000e+00,-1.800000000000000000e+01,1.200000000000000000e+01,-3.900000000000000000e+01,-4.700000000000000000e+01,2.800000000000000000e+01,1.000000000000000000e+00
+3.000000000000000000e+01,8.000000000000000000e+00,-5.000000000000000000e+01,1.100000000000000000e+01,-2.300000000000000000e+01,1.000000000000000000e+00,4.000000000000000000e+00
+-3.100000000000000000e+01,1.700000000000000000e+01,2.000000000000000000e+00,-3.500000000000000000e+01,-2.000000000000000000e+01,5.100000000000000000e+01,-3.200000000000000000e+01
+-4.200000000000000000e+01,-3.700000000000000000e+01,-3.700000000000000000e+01,3.600000000000000000e+01,-1.200000000000000000e+01,3.800000000000000000e+01,-1.200000000000000000e+01
+-2.800000000000000000e+01,-4.600000000000000000e+01,-4.900000000000000000e+01,-5.600000000000000000e+01,1.700000000000000000e+01,-3.200000000000000000e+01,6.000000000000000000e+00
+-1.900000000000000000e+01,-4.700000000000000000e+01,5.000000000000000000e+00,-1.700000000000000000e+01,-2.900000000000000000e+01,1.000000000000000000e+00,5.000000000000000000e+01
+-3.900000000000000000e+01,1.000000000000000000e+00,3.900000000000000000e+01,8.000000000000000000e+00,-1.900000000000000000e+01,5.600000000000000000e+01,5.000000000000000000e+00
+-4.400000000000000000e+01,-2.800000000000000000e+01,-4.900000000000000000e+01,-3.600000000000000000e+01,2.300000000000000000e+01,5.100000000000000000e+01,-7.000000000000000000e+00
+-1.000000000000000000e+01,1.500000000000000000e+01,5.300000000000000000e+01,5.000000000000000000e+01,-1.500000000000000000e+01,-5.200000000000000000e+01,-5.200000000000000000e+01
+-1.000000000000000000e+00,-4.600000000000000000e+01,5.000000000000000000e+01,-5.200000000000000000e+01,-4.900000000000000000e+01,3.800000000000000000e+01,9.000000000000000000e+00

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/kernel.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/kernel.txt
new file mode 100644
index 0000000..ebe16e3
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/kernel.txt

@@ -0,0 +1,64 @@
+# 3,3,7,11
+-3.100000000000000000e+01,-1.500000000000000000e+01,-2.900000000000000000e+01,-3.000000000000000000e+01,-4.200000000000000000e+01,4.800000000000000000e+01,4.400000000000000000e+01,-1.800000000000000000e+01,1.700000000000000000e+01,5.000000000000000000e+01,0.000000000000000000e+00
+5.700000000000000000e+01,1.700000000000000000e+01,5.000000000000000000e+01,-1.000000000000000000e+00,-1.000000000000000000e+01,2.200000000000000000e+01,-1.900000000000000000e+01,-2.000000000000000000e+01,-1.300000000000000000e+01,2.100000000000000000e+01,-2.900000000000000000e+01
+1.200000000000000000e+01,3.800000000000000000e+01,2.800000000000000000e+01,1.100000000000000000e+01,3.800000000000000000e+01,-1.200000000000000000e+01,-3.500000000000000000e+01,-1.000000000000000000e+01,-4.700000000000000000e+01,-2.800000000000000000e+01,-5.500000000000000000e+01
+-5.000000000000000000e+01,3.000000000000000000e+00,4.600000000000000000e+01,-4.700000000000000000e+01,-2.000000000000000000e+00,2.600000000000000000e+01,8.000000000000000000e+00,4.600000000000000000e+01,1.500000000000000000e+01,9.000000000000000000e+00,-8.000000000000000000e+00
+1.800000000000000000e+01,2.600000000000000000e+01,2.000000000000000000e+01,4.900000000000000000e+01,-4.300000000000000000e+01,2.000000000000000000e+01,-1.100000000000000000e+01,3.600000000000000000e+01,-5.000000000000000000e+01,1.800000000000000000e+01,-4.700000000000000000e+01
+5.200000000000000000e+01,-5.100000000000000000e+01,3.100000000000000000e+01,-3.800000000000000000e+01,-2.300000000000000000e+01,-1.000000000000000000e+00,-5.000000000000000000e+00,-1.500000000000000000e+01,-5.300000000000000000e+01,-3.900000000000000000e+01,3.500000000000000000e+01
+-2.400000000000000000e+01,-1.700000000000000000e+01,1.000000000000000000e+01,5.000000000000000000e+01,3.200000000000000000e+01,-3.100000000000000000e+01,2.600000000000000000e+01,-3.000000000000000000e+00,2.400000000000000000e+01,1.000000000000000000e+00,5.100000000000000000e+01
+-9.000000000000000000e+00,4.700000000000000000e+01,-1.500000000000000000e+01,-2.600000000000000000e+01,3.500000000000000000e+01,-1.300000000000000000e+01,-3.200000000000000000e+01,-2.000000000000000000e+01,-1.800000000000000000e+01,-1.600000000000000000e+01,1.700000000000000000e+01
+-2.500000000000000000e+01,-3.100000000000000000e+01,-2.900000000000000000e+01,-1.300000000000000000e+01,6.000000000000000000e+00,5.700000000000000000e+01,-5.200000000000000000e+01,2.200000000000000000e+01,-3.800000000000000000e+01,0.000000000000000000e+00,-4.600000000000000000e+01
+2.100000000000000000e+01,-5.300000000000000000e+01,4.500000000000000000e+01,-5.000000000000000000e+00,1.700000000000000000e+01,-8.000000000000000000e+00,2.800000000000000000e+01,1.000000000000000000e+01,-2.500000000000000000e+01,2.700000000000000000e+01,2.500000000000000000e+01
+-1.600000000000000000e+01,2.200000000000000000e+01,-2.700000000000000000e+01,4.800000000000000000e+01,1.200000000000000000e+01,3.000000000000000000e+01,-4.000000000000000000e+00,4.100000000000000000e+01,-4.300000000000000000e+01,4.200000000000000000e+01,5.800000000000000000e+01
+-2.000000000000000000e+01,2.700000000000000000e+01,1.300000000000000000e+01,1.700000000000000000e+01,1.900000000000000000e+01,1.300000000000000000e+01,-4.500000000000000000e+01,2.500000000000000000e+01,5.800000000000000000e+01,7.000000000000000000e+00,1.900000000000000000e+01
+2.900000000000000000e+01,5.000000000000000000e+01,-2.100000000000000000e+01,1.400000000000000000e+01,-4.800000000000000000e+01,5.700000000000000000e+01,-9.000000000000000000e+00,-2.600000000000000000e+01,-4.000000000000000000e+00,-1.500000000000000000e+01,-2.000000000000000000e+01
+1.400000000000000000e+01,3.700000000000000000e+01,-2.100000000000000000e+01,-5.000000000000000000e+01,7.000000000000000000e+00,5.200000000000000000e+01,4.900000000000000000e+01,3.400000000000000000e+01,-1.000000000000000000e+01,2.700000000000000000e+01,-8.000000000000000000e+00
+-1.000000000000000000e+00,-2.800000000000000000e+01,4.700000000000000000e+01,1.400000000000000000e+01,-4.500000000000000000e+01,-1.700000000000000000e+01,-2.000000000000000000e+00,4.200000000000000000e+01,2.200000000000000000e+01,4.000000000000000000e+01,5.100000000000000000e+01
+-3.000000000000000000e+00,-1.000000000000000000e+00,4.900000000000000000e+01,-7.000000000000000000e+00,-3.700000000000000000e+01,3.800000000000000000e+01,-8.000000000000000000e+00,3.600000000000000000e+01,6.000000000000000000e+00,3.000000000000000000e+01,-2.400000000000000000e+01
+4.200000000000000000e+01,4.400000000000000000e+01,-1.300000000000000000e+01,4.000000000000000000e+01,-2.600000000000000000e+01,-1.100000000000000000e+01,-7.000000000000000000e+00,-1.900000000000000000e+01,3.200000000000000000e+01,-3.300000000000000000e+01,5.800000000000000000e+01
+-7.000000000000000000e+00,-2.600000000000000000e+01,2.200000000000000000e+01,4.200000000000000000e+01,-2.000000000000000000e+00,-2.700000000000000000e+01,-4.600000000000000000e+01,9.000000000000000000e+00,5.000000000000000000e+00,-3.400000000000000000e+01,4.100000000000000000e+01
+-5.100000000000000000e+01,2.800000000000000000e+01,-1.700000000000000000e+01,-1.100000000000000000e+01,2.100000000000000000e+01,-2.800000000000000000e+01,-8.000000000000000000e+00,4.500000000000000000e+01,-5.200000000000000000e+01,1.500000000000000000e+01,3.400000000000000000e+01
+5.100000000000000000e+01,-3.200000000000000000e+01,-2.900000000000000000e+01,1.700000000000000000e+01,-3.700000000000000000e+01,-1.700000000000000000e+01,3.900000000000000000e+01,1.300000000000000000e+01,-1.000000000000000000e+00,-3.300000000000000000e+01,-8.000000000000000000e+00
+2.900000000000000000e+01,1.600000000000000000e+01,-4.900000000000000000e+01,-2.000000000000000000e+01,5.500000000000000000e+01,2.100000000000000000e+01,1.000000000000000000e+01,-4.700000000000000000e+01,-3.100000000000000000e+01,-5.100000000000000000e+01,1.100000000000000000e+01
+-4.400000000000000000e+01,4.500000000000000000e+01,3.900000000000000000e+01,8.000000000000000000e+00,-3.700000000000000000e+01,-4.700000000000000000e+01,1.600000000000000000e+01,2.200000000000000000e+01,7.000000000000000000e+00,6.000000000000000000e+00,3.800000000000000000e+01
+3.500000000000000000e+01,1.700000000000000000e+01,1.500000000000000000e+01,-3.900000000000000000e+01,-3.200000000000000000e+01,-3.700000000000000000e+01,-5.400000000000000000e+01,3.300000000000000000e+01,-8.000000000000000000e+00,1.700000000000000000e+01,-2.300000000000000000e+01
+4.900000000000000000e+01,-3.300000000000000000e+01,1.000000000000000000e+01,-8.000000000000000000e+00,2.700000000000000000e+01,2.000000000000000000e+01,-1.000000000000000000e+01,1.500000000000000000e+01,-3.300000000000000000e+01,1.200000000000000000e+01,4.900000000000000000e+01
+2.800000000000000000e+01,-4.900000000000000000e+01,3.400000000000000000e+01,2.800000000000000000e+01,-8.000000000000000000e+00,-2.400000000000000000e+01,-1.300000000000000000e+01,1.000000000000000000e+00,-3.600000000000000000e+01,-4.900000000000000000e+01,5.700000000000000000e+01
+2.900000000000000000e+01,3.200000000000000000e+01,-1.100000000000000000e+01,-3.000000000000000000e+00,1.900000000000000000e+01,5.600000000000000000e+01,1.900000000000000000e+01,-3.800000000000000000e+01,4.900000000000000000e+01,6.000000000000000000e+00,1.400000000000000000e+01
+-4.500000000000000000e+01,2.100000000000000000e+01,-4.700000000000000000e+01,-1.500000000000000000e+01,-1.600000000000000000e+01,5.400000000000000000e+01,3.100000000000000000e+01,4.000000000000000000e+00,-2.600000000000000000e+01,-1.700000000000000000e+01,-1.800000000000000000e+01
+2.700000000000000000e+01,-2.900000000000000000e+01,9.000000000000000000e+00,5.500000000000000000e+01,1.200000000000000000e+01,-2.500000000000000000e+01,0.000000000000000000e+00,7.000000000000000000e+00,1.600000000000000000e+01,-1.600000000000000000e+01,-4.000000000000000000e+00
+-4.500000000000000000e+01,-2.700000000000000000e+01,-2.200000000000000000e+01,4.300000000000000000e+01,5.200000000000000000e+01,-4.700000000000000000e+01,2.000000000000000000e+01,5.400000000000000000e+01,-2.000000000000000000e+00,-1.700000000000000000e+01,5.800000000000000000e+01
+1.900000000000000000e+01,-7.000000000000000000e+00,4.000000000000000000e+00,-7.000000000000000000e+00,-2.600000000000000000e+01,-2.000000000000000000e+00,1.500000000000000000e+01,1.500000000000000000e+01,-2.100000000000000000e+01,-3.000000000000000000e+00,1.000000000000000000e+00
+6.000000000000000000e+00,1.000000000000000000e+01,-2.700000000000000000e+01,5.400000000000000000e+01,-7.000000000000000000e+00,-8.000000000000000000e+00,-1.300000000000000000e+01,2.100000000000000000e+01,2.900000000000000000e+01,-4.200000000000000000e+01,5.100000000000000000e+01
+7.000000000000000000e+00,2.000000000000000000e+00,-1.600000000000000000e+01,-4.200000000000000000e+01,-2.600000000000000000e+01,4.000000000000000000e+00,-5.200000000000000000e+01,-5.000000000000000000e+01,7.000000000000000000e+00,3.300000000000000000e+01,5.600000000000000000e+01
+-3.100000000000000000e+01,4.500000000000000000e+01,5.800000000000000000e+01,2.500000000000000000e+01,2.500000000000000000e+01,-5.000000000000000000e+01,-4.200000000000000000e+01,-3.100000000000000000e+01,-5.400000000000000000e+01,2.500000000000000000e+01,3.800000000000000000e+01
+-5.400000000000000000e+01,5.800000000000000000e+01,-3.800000000000000000e+01,2.300000000000000000e+01,1.100000000000000000e+01,-4.000000000000000000e+00,1.900000000000000000e+01,3.400000000000000000e+01,-5.500000000000000000e+01,1.500000000000000000e+01,-4.400000000000000000e+01
+-1.600000000000000000e+01,-4.000000000000000000e+01,9.000000000000000000e+00,4.900000000000000000e+01,-4.200000000000000000e+01,-1.600000000000000000e+01,-5.500000000000000000e+01,1.900000000000000000e+01,-4.500000000000000000e+01,1.600000000000000000e+01,-3.900000000000000000e+01
+2.900000000000000000e+01,0.000000000000000000e+00,3.500000000000000000e+01,-5.300000000000000000e+01,1.100000000000000000e+01,-2.000000000000000000e+01,-5.500000000000000000e+01,-5.000000000000000000e+01,3.700000000000000000e+01,-2.700000000000000000e+01,2.200000000000000000e+01
+5.000000000000000000e+00,-1.200000000000000000e+01,3.100000000000000000e+01,5.100000000000000000e+01,-4.700000000000000000e+01,5.000000000000000000e+01,-4.900000000000000000e+01,9.000000000000000000e+00,1.100000000000000000e+01,3.800000000000000000e+01,-2.500000000000000000e+01
+-2.000000000000000000e+00,-1.200000000000000000e+01,-2.200000000000000000e+01,-1.000000000000000000e+01,5.800000000000000000e+01,-2.000000000000000000e+01,-1.000000000000000000e+01,-9.000000000000000000e+00,3.000000000000000000e+00,-2.600000000000000000e+01,4.300000000000000000e+01
+-3.200000000000000000e+01,7.000000000000000000e+00,1.700000000000000000e+01,3.600000000000000000e+01,-1.800000000000000000e+01,2.400000000000000000e+01,3.000000000000000000e+00,3.700000000000000000e+01,-6.000000000000000000e+00,-2.800000000000000000e+01,3.600000000000000000e+01
+-1.400000000000000000e+01,5.100000000000000000e+01,5.000000000000000000e+01,-1.900000000000000000e+01,3.700000000000000000e+01,8.000000000000000000e+00,4.900000000000000000e+01,-4.300000000000000000e+01,3.800000000000000000e+01,1.000000000000000000e+01,-1.800000000000000000e+01
+3.000000000000000000e+00,-1.600000000000000000e+01,2.600000000000000000e+01,1.000000000000000000e+00,-2.600000000000000000e+01,-7.000000000000000000e+00,5.400000000000000000e+01,5.200000000000000000e+01,-5.400000000000000000e+01,2.400000000000000000e+01,-3.000000000000000000e+00
+5.600000000000000000e+01,2.200000000000000000e+01,5.000000000000000000e+00,1.900000000000000000e+01,1.400000000000000000e+01,1.000000000000000000e+01,2.400000000000000000e+01,-3.000000000000000000e+00,2.500000000000000000e+01,5.000000000000000000e+00,-4.700000000000000000e+01
+-3.100000000000000000e+01,1.400000000000000000e+01,-4.900000000000000000e+01,3.800000000000000000e+01,-3.800000000000000000e+01,4.000000000000000000e+00,-3.800000000000000000e+01,1.200000000000000000e+01,-5.500000000000000000e+01,-9.000000000000000000e+00,-3.000000000000000000e+01
+9.000000000000000000e+00,9.000000000000000000e+00,9.000000000000000000e+00,-3.200000000000000000e+01,-4.000000000000000000e+01,-3.800000000000000000e+01,3.300000000000000000e+01,2.000000000000000000e+00,-2.900000000000000000e+01,-4.000000000000000000e+00,-1.500000000000000000e+01
+-3.800000000000000000e+01,-3.500000000000000000e+01,-2.500000000000000000e+01,-4.400000000000000000e+01,-1.400000000000000000e+01,-5.400000000000000000e+01,-4.200000000000000000e+01,-5.000000000000000000e+01,-1.500000000000000000e+01,5.000000000000000000e+00,0.000000000000000000e+00
+-4.500000000000000000e+01,-2.200000000000000000e+01,4.000000000000000000e+01,-2.300000000000000000e+01,1.800000000000000000e+01,2.200000000000000000e+01,-3.300000000000000000e+01,5.000000000000000000e+00,9.000000000000000000e+00,-1.300000000000000000e+01,-1.600000000000000000e+01
+0.000000000000000000e+00,-4.600000000000000000e+01,1.400000000000000000e+01,2.900000000000000000e+01,-4.000000000000000000e+01,-8.000000000000000000e+00,3.000000000000000000e+00,1.000000000000000000e+00,1.200000000000000000e+01,1.000000000000000000e+00,-3.800000000000000000e+01
+5.300000000000000000e+01,-2.300000000000000000e+01,9.000000000000000000e+00,-4.000000000000000000e+00,1.200000000000000000e+01,1.300000000000000000e+01,-5.200000000000000000e+01,-3.000000000000000000e+00,-5.100000000000000000e+01,5.100000000000000000e+01,1.800000000000000000e+01
+-4.200000000000000000e+01,4.000000000000000000e+01,-3.000000000000000000e+00,1.600000000000000000e+01,-5.500000000000000000e+01,-4.400000000000000000e+01,5.000000000000000000e+00,4.600000000000000000e+01,1.100000000000000000e+01,1.500000000000000000e+01,2.400000000000000000e+01
+-5.300000000000000000e+01,-1.600000000000000000e+01,-2.500000000000000000e+01,4.500000000000000000e+01,-4.800000000000000000e+01,-6.000000000000000000e+00,5.000000000000000000e+00,-3.900000000000000000e+01,2.700000000000000000e+01,1.000000000000000000e+00,3.400000000000000000e+01
+-4.400000000000000000e+01,1.400000000000000000e+01,-5.000000000000000000e+00,5.500000000000000000e+01,1.300000000000000000e+01,4.800000000000000000e+01,1.800000000000000000e+01,-4.900000000000000000e+01,-3.900000000000000000e+01,7.000000000000000000e+00,4.200000000000000000e+01
+4.100000000000000000e+01,5.200000000000000000e+01,5.600000000000000000e+01,-5.000000000000000000e+00,5.800000000000000000e+01,-4.300000000000000000e+01,-1.000000000000000000e+01,1.300000000000000000e+01,4.300000000000000000e+01,3.700000000000000000e+01,-2.000000000000000000e+01
+5.200000000000000000e+01,1.800000000000000000e+01,-1.200000000000000000e+01,4.000000000000000000e+00,4.000000000000000000e+00,2.300000000000000000e+01,4.400000000000000000e+01,-4.600000000000000000e+01,-3.500000000000000000e+01,4.400000000000000000e+01,-2.700000000000000000e+01
+1.300000000000000000e+01,2.400000000000000000e+01,-7.000000000000000000e+00,-2.900000000000000000e+01,-3.300000000000000000e+01,3.500000000000000000e+01,1.000000000000000000e+01,1.200000000000000000e+01,-2.800000000000000000e+01,1.200000000000000000e+01,3.500000000000000000e+01
+-3.500000000000000000e+01,4.000000000000000000e+01,-7.000000000000000000e+00,-5.500000000000000000e+01,-2.800000000000000000e+01,2.100000000000000000e+01,1.100000000000000000e+01,-1.200000000000000000e+01,2.800000000000000000e+01,-2.100000000000000000e+01,-8.000000000000000000e+00
+-3.400000000000000000e+01,2.000000000000000000e+00,-1.300000000000000000e+01,-2.700000000000000000e+01,-8.000000000000000000e+00,3.000000000000000000e+00,8.000000000000000000e+00,-1.000000000000000000e+01,-6.000000000000000000e+00,8.000000000000000000e+00,-1.200000000000000000e+01
+-5.300000000000000000e+01,-2.800000000000000000e+01,1.400000000000000000e+01,3.700000000000000000e+01,-4.200000000000000000e+01,-5.000000000000000000e+01,-1.100000000000000000e+01,-7.000000000000000000e+00,-1.800000000000000000e+01,-1.300000000000000000e+01,8.000000000000000000e+00
+-7.000000000000000000e+00,-8.000000000000000000e+00,-5.000000000000000000e+01,2.900000000000000000e+01,-5.600000000000000000e+01,2.700000000000000000e+01,-5.200000000000000000e+01,3.000000000000000000e+01,2.700000000000000000e+01,4.000000000000000000e+01,-4.300000000000000000e+01
+1.300000000000000000e+01,1.100000000000000000e+01,-4.500000000000000000e+01,3.000000000000000000e+00,1.800000000000000000e+01,1.900000000000000000e+01,-1.500000000000000000e+01,-1.900000000000000000e+01,-1.500000000000000000e+01,5.000000000000000000e+00,-2.300000000000000000e+01
+5.100000000000000000e+01,-3.100000000000000000e+01,-1.500000000000000000e+01,4.000000000000000000e+01,-4.700000000000000000e+01,1.200000000000000000e+01,2.000000000000000000e+00,-2.700000000000000000e+01,-2.600000000000000000e+01,-7.000000000000000000e+00,-2.600000000000000000e+01
+-4.000000000000000000e+01,5.300000000000000000e+01,2.400000000000000000e+01,-5.200000000000000000e+01,2.100000000000000000e+01,1.600000000000000000e+01,5.000000000000000000e+01,-1.900000000000000000e+01,-2.600000000000000000e+01,1.200000000000000000e+01,-3.100000000000000000e+01
+-3.000000000000000000e+01,2.300000000000000000e+01,-5.200000000000000000e+01,2.000000000000000000e+01,3.700000000000000000e+01,2.000000000000000000e+01,-2.800000000000000000e+01,-1.600000000000000000e+01,3.500000000000000000e+01,1.600000000000000000e+01,-4.800000000000000000e+01
+-4.700000000000000000e+01,2.900000000000000000e+01,3.000000000000000000e+01,1.300000000000000000e+01,-1.100000000000000000e+01,-5.000000000000000000e+00,2.800000000000000000e+01,-5.100000000000000000e+01,0.000000000000000000e+00,5.800000000000000000e+01,-4.900000000000000000e+01

diff --git a/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/params.txt b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/params.txt
new file mode 100644
index 0000000..efeb8d3
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/fully_connected_int16/params.txt

@@ -0,0 +1,12 @@
+7
+11
+3
+3
+3
+3
+1
+1
+0
+0
+2
+0

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/biases_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/biases_data.h
new file mode 100644
index 0000000..80674a5
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/biases_data.h

@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int64_t fully_connected_int16_biases[11] = {-5, 45, 53, -33, 31, 51, 43, 35, 37, -1, 46};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/config_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/config_data.h
new file mode 100644
index 0000000..2607db8
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/config_data.h

@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#define FULLY_CONNECTED_INT16_OUT_CH 11
+#define FULLY_CONNECTED_INT16_IN_CH 7
+#define FULLY_CONNECTED_INT16_INPUT_W 3
+#define FULLY_CONNECTED_INT16_INPUT_H 3
+#define FULLY_CONNECTED_INT16_DST_SIZE 22
+#define FULLY_CONNECTED_INT16_INPUT_SIZE 63
+#define FULLY_CONNECTED_INT16_OUT_ACTIVATION_MIN -32766
+#define FULLY_CONNECTED_INT16_OUT_ACTIVATION_MAX 32767
+#define FULLY_CONNECTED_INT16_INPUT_BATCHES 2
+#define FULLY_CONNECTED_INT16_INPUT_OFFSET 0
+#define FULLY_CONNECTED_INT16_OUTPUT_OFFSET 0
+#define FULLY_CONNECTED_INT16_OUTPUT_MULTIPLIER 1073741824
+#define FULLY_CONNECTED_INT16_OUTPUT_SHIFT 1
+#define FULLY_CONNECTED_INT16_ACCUMULATION_DEPTH 63

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/input_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/input_data.h
new file mode 100644
index 0000000..f112806
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/input_data.h

@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t fully_connected_int16_input[126] = {
+    -3,  -38, 24,  8,   -11, -43, 47,  45,  6,   -10, -3,  -52, -23, 9,   32,  3,   -7,  -2,  -5,  28, -40,
+    -21, -39, 6,   -45, 35,  -8,  -16, -16, 2,   -9,  -42, -35, 12,  -37, 33,  14,  -47, -32, -38, 40, 13,
+    12,  46,  12,  -46, 27,  -42, 33,  3,   -43, 53,  3,   -50, -35, 27,  -3,  -18, 12,  -39, -47, 28, 1,
+    30,  8,   -50, 11,  -23, 1,   4,   -31, 17,  2,   -35, -20, 51,  -32, -42, -37, -37, 36,  -12, 38, -12,
+    -28, -46, -49, -56, 17,  -32, 6,   -19, -47, 5,   -17, -29, 1,   50,  -39, 1,   39,  8,   -19, 56, 5,
+    -44, -28, -49, -36, 23,  51,  -7,  -10, 15,  53,  50,  -15, -52, -52, -1,  -46, 50,  -52, -49, 38, 9};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/output_ref_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/output_ref_data.h
new file mode 100644
index 0000000..e264016
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/output_ref_data.h

@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t fully_connected_int16_output_ref[22] = {-428,  1672,  -2140, 1843,  -13255, 7356, -9406, -6522,
+                                                    -1898, 7253,  -5511, -4247, -9077,  372,  -6992, -13817,
+                                                    9870,  -1640, 6758,  5351,  -4067,  -373};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/test_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/test_data.h
new file mode 100644
index 0000000..c9e0a61
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/test_data.h

@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_ref_data.h"
+#include "weights_data.h"

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/weights_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/weights_data.h
new file mode 100644
index 0000000..642743a
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/fully_connected_int16/weights_data.h

@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated by generate_test_data.py using TFL version 2.5.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t fully_connected_int16_weights[693] = {
+    -31, -15, -29, -30, -42, 48,  44,  -18, 17,  50,  0,   57,  17,  50,  -1,  -10, 22,  -19, -20, -13, 21,  -29, 12,
+    38,  28,  11,  38,  -12, -35, -10, -47, -28, -55, -50, 3,   46,  -47, -2,  26,  8,   46,  15,  9,   -8,  18,  26,
+    20,  49,  -43, 20,  -11, 36,  -50, 18,  -47, 52,  -51, 31,  -38, -23, -1,  -5,  -15, -53, -39, 35,  -24, -17, 10,
+    50,  32,  -31, 26,  -3,  24,  1,   51,  -9,  47,  -15, -26, 35,  -13, -32, -20, -18, -16, 17,  -25, -31, -29, -13,
+    6,   57,  -52, 22,  -38, 0,   -46, 21,  -53, 45,  -5,  17,  -8,  28,  10,  -25, 27,  25,  -16, 22,  -27, 48,  12,
+    30,  -4,  41,  -43, 42,  58,  -20, 27,  13,  17,  19,  13,  -45, 25,  58,  7,   19,  29,  50,  -21, 14,  -48, 57,
+    -9,  -26, -4,  -15, -20, 14,  37,  -21, -50, 7,   52,  49,  34,  -10, 27,  -8,  -1,  -28, 47,  14,  -45, -17, -2,
+    42,  22,  40,  51,  -3,  -1,  49,  -7,  -37, 38,  -8,  36,  6,   30,  -24, 42,  44,  -13, 40,  -26, -11, -7,  -19,
+    32,  -33, 58,  -7,  -26, 22,  42,  -2,  -27, -46, 9,   5,   -34, 41,  -51, 28,  -17, -11, 21,  -28, -8,  45,  -52,
+    15,  34,  51,  -32, -29, 17,  -37, -17, 39,  13,  -1,  -33, -8,  29,  16,  -49, -20, 55,  21,  10,  -47, -31, -51,
+    11,  -44, 45,  39,  8,   -37, -47, 16,  22,  7,   6,   38,  35,  17,  15,  -39, -32, -37, -54, 33,  -8,  17,  -23,
+    49,  -33, 10,  -8,  27,  20,  -10, 15,  -33, 12,  49,  28,  -49, 34,  28,  -8,  -24, -13, 1,   -36, -49, 57,  29,
+    32,  -11, -3,  19,  56,  19,  -38, 49,  6,   14,  -45, 21,  -47, -15, -16, 54,  31,  4,   -26, -17, -18, 27,  -29,
+    9,   55,  12,  -25, 0,   7,   16,  -16, -4,  -45, -27, -22, 43,  52,  -47, 20,  54,  -2,  -17, 58,  19,  -7,  4,
+    -7,  -26, -2,  15,  15,  -21, -3,  1,   6,   10,  -27, 54,  -7,  -8,  -13, 21,  29,  -42, 51,  7,   2,   -16, -42,
+    -26, 4,   -52, -50, 7,   33,  56,  -31, 45,  58,  25,  25,  -50, -42, -31, -54, 25,  38,  -54, 58,  -38, 23,  11,
+    -4,  19,  34,  -55, 15,  -44, -16, -40, 9,   49,  -42, -16, -55, 19,  -45, 16,  -39, 29,  0,   35,  -53, 11,  -20,
+    -55, -50, 37,  -27, 22,  5,   -12, 31,  51,  -47, 50,  -49, 9,   11,  38,  -25, -2,  -12, -22, -10, 58,  -20, -10,
+    -9,  3,   -26, 43,  -32, 7,   17,  36,  -18, 24,  3,   37,  -6,  -28, 36,  -14, 51,  50,  -19, 37,  8,   49,  -43,
+    38,  10,  -18, 3,   -16, 26,  1,   -26, -7,  54,  52,  -54, 24,  -3,  56,  22,  5,   19,  14,  10,  24,  -3,  25,
+    5,   -47, -31, 14,  -49, 38,  -38, 4,   -38, 12,  -55, -9,  -30, 9,   9,   9,   -32, -40, -38, 33,  2,   -29, -4,
+    -15, -38, -35, -25, -44, -14, -54, -42, -50, -15, 5,   0,   -45, -22, 40,  -23, 18,  22,  -33, 5,   9,   -13, -16,
+    0,   -46, 14,  29,  -40, -8,  3,   1,   12,  1,   -38, 53,  -23, 9,   -4,  12,  13,  -52, -3,  -51, 51,  18,  -42,
+    40,  -3,  16,  -55, -44, 5,   46,  11,  15,  24,  -53, -16, -25, 45,  -48, -6,  5,   -39, 27,  1,   34,  -44, 14,
+    -5,  55,  13,  48,  18,  -49, -39, 7,   42,  41,  52,  56,  -5,  58,  -43, -10, 13,  43,  37,  -20, 52,  18,  -12,
+    4,   4,   23,  44,  -46, -35, 44,  -27, 13,  24,  -7,  -29, -33, 35,  10,  12,  -28, 12,  35,  -35, 40,  -7,  -55,
+    -28, 21,  11,  -12, 28,  -21, -8,  -34, 2,   -13, -27, -8,  3,   8,   -10, -6,  8,   -12, -53, -28, 14,  37,  -42,
+    -50, -11, -7,  -18, -13, 8,   -7,  -8,  -50, 29,  -56, 27,  -52, 30,  27,  40,  -43, 13,  11,  -45, 3,   18,  19,
+    -15, -19, -15, 5,   -23, 51,  -31, -15, 40,  -47, 12,  2,   -27, -26, -7,  -26, -40, 53,  24,  -52, 21,  16,  50,
+    -19, -26, 12,  -31, -30, 23,  -52, 20,  37,  20,  -28, -16, 35,  16,  -48, -47, 29,  30,  13,  -11, -5,  28,  -51,
+    0,   58,  -49};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_mult_data.h b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_mult_data.h
index 46aa18e..41ef7d7 100644
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_mult_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/int16xint8/output_mult_data.h

@@ -20,4 +20,4 @@
 #pragma once
 #include <stdint.h>
 
-const int32_t int16xint8_output_mult[4] = {1082246025, 1082246025, 1082246025, 1082246025};
+const int32_t int16xint8_output_mult[4] = {1082212997, 1082212997, 1082212997, 1082212997};

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/CMakeLists.txt b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/CMakeLists.txt
new file mode 100644
index 0000000..aaea18b
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/CMakeLists.txt

@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_fully_connected_s16)
+
+target_sources(test_arm_fully_connected_s16 PRIVATE
+    Unity/unity_test_arm_fully_connected_s16.c
+    Unity/TestRunner/unity_test_arm_fully_connected_s16_runner.c)

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/Unity/unity_test_arm_fully_connected_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/Unity/unity_test_arm_fully_connected_s16.c
new file mode 100644
index 0000000..ecf4743
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/Unity/unity_test_arm_fully_connected_s16.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_fully_connected_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_fully_connected_int16_arm_fully_connected_s16(void) { fully_connected_int16_arm_fully_connected_s16(); }

diff --git a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/test_arm_fully_connected_s16.c b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/test_arm_fully_connected_s16.c
new file mode 100644
index 0000000..a9fedc5
--- /dev/null
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_fully_connected_s16/test_arm_fully_connected_s16.c

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arm_nnfunctions.h>
+#include <stdlib.h>
+#include <unity.h>
+
+#include "../TestData/fully_connected_int16/test_data.h"
+#include "../Utils/validate.h"
+
+void fully_connected_int16_arm_fully_connected_s16(void)
+{
+    const arm_status expected = ARM_MATH_SUCCESS;
+    q15_t output[FULLY_CONNECTED_INT16_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_fc_params fc_params;
+    cmsis_nn_per_tensor_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const q63_t *bias_data = fully_connected_int16_biases;
+    const q7_t *kernel_data = fully_connected_int16_weights;
+    const q15_t *input_data = fully_connected_int16_input;
+    const q15_t *output_ref = fully_connected_int16_output_ref;
+    const int32_t output_ref_size = FULLY_CONNECTED_INT16_DST_SIZE;
+
+    input_dims.n = FULLY_CONNECTED_INT16_INPUT_BATCHES;
+    input_dims.w = FULLY_CONNECTED_INT16_INPUT_W;
+    input_dims.h = FULLY_CONNECTED_INT16_INPUT_H;
+    input_dims.c = FULLY_CONNECTED_INT16_IN_CH;
+    filter_dims.n = FULLY_CONNECTED_INT16_ACCUMULATION_DEPTH;
+    filter_dims.c = FULLY_CONNECTED_INT16_OUT_CH;
+    output_dims.n = FULLY_CONNECTED_INT16_INPUT_BATCHES;
+    output_dims.c = FULLY_CONNECTED_INT16_OUT_CH;
+
+    fc_params.input_offset = 0;
+    fc_params.filter_offset = 0;
+    fc_params.output_offset = 0;
+    fc_params.activation.min = FULLY_CONNECTED_INT16_OUT_ACTIVATION_MIN;
+    fc_params.activation.max = FULLY_CONNECTED_INT16_OUT_ACTIVATION_MAX;
+
+    quant_params.multiplier = FULLY_CONNECTED_INT16_OUTPUT_MULTIPLIER;
+    quant_params.shift = FULLY_CONNECTED_INT16_OUTPUT_SHIFT;
+
+    int32_t buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
+    arm_status result = arm_fully_connected_s16(&ctx,
+                                                &fc_params,
+                                                &quant_params,
+                                                &input_dims,
+                                                input_data,
+                                                &filter_dims,
+                                                kernel_data,
+                                                &bias_dims,
+                                                bias_data,
+                                                &output_dims,
+                                                output);
+
+    free(ctx.buf);
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
+}

diff --git a/CMSIS/NN/Tests/UnitTest/generate_test_data.py b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
index b18196b..9e52d1d 100755
--- a/CMSIS/NN/Tests/UnitTest/generate_test_data.py
+++ b/CMSIS/NN/Tests/UnitTest/generate_test_data.py

@@ -99,7 +99,8 @@
 
     def __init__(self, dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad, randmin,
                  randmax, outminrange=INT8_MIN, outmaxrange=INT8_MAX, batches=1, generate_bias=True, relu6=False,
-                 out_activation_min=None, out_activation_max=None):
+                 out_activation_min=None, out_activation_max=None, int16xint8=False, input_scale=None, bias_min=None,
+                 bias_max=None):
 
         self.tensor_flow_reference_version = ("// Generated by {} using TFL version {} as reference.\n".
                                               format(os.path.basename(__file__), tf.__version__))
@@ -107,8 +108,15 @@
         # Randomization interval
         self.mins = randmin
         self.maxs = randmax
-        self.bias_mins = randmin
-        self.bias_maxs = randmax
+
+        if bias_min:
+            self.bias_mins = bias_min
+        else:
+            self.bias_mins = randmin
+        if bias_max:
+            self.bias_maxs = bias_max
+        else:
+            self.bias_maxs = randmax
 
         self.relu6 = relu6
         self.input_ch = in_ch
@@ -123,20 +131,31 @@
         self.test_type = testtype
         self.has_padding = pad
 
-        if out_activation_min:
+        self.is_int16xint8 = int16xint8
+
+        if out_activation_min is not None:
             self.out_activation_min = out_activation_min
         else:
-            self.out_activation_min = INT8_MIN
-        if out_activation_max:
+            self.out_activation_min = INT16_MIN if self.is_int16xint8 else INT8_MIN
+        if out_activation_max is not None:
             self.out_activation_max = out_activation_max
         else:
-            self.out_activation_max = INT8_MAX
+            self.out_activation_max = INT16_MAX if self.is_int16xint8 else INT8_MAX
 
-        minrange = randmin - 1
-        maxrange = randmax + 1
-        (self.input_scale, self.input_zero_point) = self.derive_scale_and_zeropoint_from_min_max(minrange, maxrange)
-        (self.output_scale, self.output_zero_point) = self.derive_scale_and_zeropoint_from_min_max(outminrange,
-                                                                                                   outmaxrange)
+        if self.is_int16xint8:
+            if input_scale:
+                self.input_scale = input_scale
+            else:
+                self.input_scale = self.derive_scale_from_min_max(self.mins, self.maxs, INT16_MIN, INT16_MAX)
+            self.input_zero_point = 0
+            self.output_scale = self.derive_scale_from_min_max(outminrange, outmaxrange, INT16_MIN, INT16_MAX)
+            self.output_zero_point = 0
+        else:
+            minrange = randmin - 1
+            maxrange = randmax + 1
+            (self.input_scale, self.input_zero_point) = self.derive_scale_and_zeropoint_from_min_max(minrange, maxrange)
+            (self.output_scale, self.output_zero_point) = self.derive_scale_and_zeropoint_from_min_max(outminrange,
+                                                                                                       outmaxrange)
 
         # Always use output scale of 1, when derived it sometimes gets slighly smaller than 1,
         # which may cause output to differ.
@@ -467,31 +486,10 @@
                  int16xint8=False, input_scale=None, bias_min=None, bias_max=None):
         super().__init__(dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad,
                          randmin, randmax, outminrange, outmaxrange, batches, generate_bias=generate_bias, relu6=relu6,
-                         out_activation_min=out_activation_min, out_activation_max=out_activation_max)
+                         out_activation_min=out_activation_min, out_activation_max=out_activation_max,
+                         int16xint8=int16xint8, input_scale=input_scale, bias_min=bias_min, bias_max=bias_min)
 
         self.scaling_factors = []
-        self.is_int16xint8 = int16xint8
-
-        if bias_min:
-            self.bias_mins = bias_min
-        if bias_max:
-            self.bias_maxs = bias_max
-        if self.is_int16xint8:
-            if input_scale:
-                self.input_scale = input_scale
-            else:
-                self.input_scale = self.derive_scale_from_min_max(self.mins, self.maxs, INT16_MIN, INT16_MAX)
-            self.input_zero_point = 0
-            self.output_scale = self.derive_scale_from_min_max(outminrange, outmaxrange, INT16_MIN, INT16_MAX)
-            self.output_zero_point = 0
-            if out_activation_min:
-                self.out_activation_min = out_activation_min
-            else:
-                self.out_activation_min = INT16_MIN
-            if out_activation_max:
-                self.out_activation_max = out_activation_max
-            else:
-                self.out_activation_max = INT16_MAX
 
         if self.test_type == 'conv':
             self.quantized_dimension = 0
@@ -743,24 +741,30 @@
 class FullyConnectedSettings(TestSettings):
 
     def __init__(self, dataset, testtype, args, in_ch=1, out_ch=1, x_in=1, y_in=1, w_x=1, w_y=1, stride_x=1, stride_y=1,
-                 pad=False, randmin=-4, randmax=4, outminrange=INT8_MIN, outmaxrange=INT8_MAX, batches=1, input_scale=1.0,
-                 input_zero_point=0, weights_scale=1.0, bias_scale=1.0, output_scale=1.0,
-                 output_zero_point=0, generate_bias=True, out_activation_min=None, out_activation_max=None):
+                 pad=False, randmin=-4, randmax=4, outminrange=INT8_MIN, outmaxrange=INT8_MAX, batches=1,
+                 input_scale=1.0, input_zero_point=0, weights_scale=1.0, bias_scale=1.0, output_scale=1.0,
+                 output_zero_point=0, generate_bias=True, out_activation_min=None, out_activation_max=None,
+                 int16xint8=False, bias_min=None, bias_max=None):
         super().__init__(dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad, randmin,
                          randmax, outminrange, outmaxrange, batches, generate_bias=generate_bias,
-                         out_activation_min=out_activation_min, out_activation_max=out_activation_max)
+                         out_activation_min=out_activation_min, out_activation_max=out_activation_max,
+                         int16xint8=int16xint8, bias_min=bias_min, bias_max=bias_min)
 
         if not self.test_type == 'fully_connected':
             raise RuntimeError("Invalid test type {}".format(self.test_type))
         if x_in != w_x or y_in != w_y:
             raise RuntimeError("Mismatching input and filter dimensions")
 
+        if self.is_int16xint8:
+            self.input_zero_point = 0
+            self.output_zero_point = 0
+        else:
+            self.input_zero_point = input_zero_point
+            self.output_zero_point = output_zero_point
         self.input_scale = input_scale
-        self.input_zero_point = input_zero_point
         self.weights_scale = weights_scale
         self.bias_scale = bias_scale
         self.output_scale = output_scale
-        self.output_zero_point = output_zero_point
 
     def write_c_config_header(self):
         super().write_c_config_header()
@@ -798,6 +802,17 @@
     def generate_data(self, input_data=None, weights=None, biases=None):
         input_data = self.get_randomized_input_data(input_data)
 
+        if self.is_int16xint8:
+            quant_max_value = INT16_MAX
+            quant_min_value = INT16_MIN
+            datatype = "q15_t"
+            bias_datatype = "int64_t"
+        else:
+            quant_max_value = INT8_MAX
+            quant_min_value = INT8_MIN
+            datatype = "q7_t"
+            bias_datatype = "int32_t"
+
         if weights is not None:
             weights = tf.reshape(weights, [self.filter_y, self.filter_x, self.input_ch, self.output_ch])
         else:
@@ -809,10 +824,14 @@
 
         conv = self.conv2d(input_data, self.reshape_conv_kernel(weights), biases)
 
-        self.generate_c_array("input", self.convert_tensor(input_data, self.quantize_input))
+        self.generate_c_array("input",
+                              self.convert_tensor(input_data,
+                                                  self.quantize_input,
+                                                  quant_min_value,
+                                                  quant_max_value), datatype=datatype)
         self.generate_c_array("weights", self.convert_tensor(weights, self.quantize_weights))
-        self.generate_c_array("biases", self.convert_tensor(biases, self.quantize_bias), "int32_t")
-        self.generate_c_array("output_ref", self.convert_tensor(conv, self.quantize_output))
+        self.generate_c_array("biases", self.convert_tensor(biases, self.quantize_bias), bias_datatype)
+        self.generate_c_array("output_ref", self.convert_tensor(conv, self.quantize_output), datatype=datatype)
 
         self.quantize_multiplier()
 
@@ -1184,6 +1203,12 @@
     ALL_TESTDATA_SETS[dataset] = FullyConnectedSettings(dataset, type_of_test, args, in_ch=10, out_ch=4, randmin=-15,
                                                         randmax=15, input_zero_point=0, output_zero_point=0,
                                                         out_activation_min=-105, out_activation_max=120)
+    dataset = 'fully_connected_int16'
+    ALL_TESTDATA_SETS[dataset] = FullyConnectedSettings(dataset, type_of_test, args, in_ch=7, out_ch=11, x_in=3, y_in=3,
+                                                        w_x=3, w_y=3, batches=2, randmin=-56, randmax=59,
+                                                        input_zero_point=-3, output_zero_point=-303,
+                                                        out_activation_min=-32766, out_activation_max=32767,
+                                                        int16xint8=True)
 
     type_of_test = 'avgpool'
     dataset = 'avgpooling'
commit	0fe4e1c166dda73df2b3ed6cb7ec42ef97c2c49a	[log] [tgz]
author	Måns Nilsson <mans.nilsson@arm.com>	Mon Jun 21 15:05:46 2021 +0200
committer	felix-johnny <48442848+felix-johnny@users.noreply.github.com>	Wed Aug 18 10:25:27 2021 +0100
tree	75051188a1952986689292b246d1d2792caa8803
parent	78ed0ac442ab11662dae9e4995ea1b16f5e4a2cc [diff]