CMSIS-NN Cleanup Doxygen comments (#1541)

Duplicate function descriptions  are removed. The header
is retained to be the main reference.

diff --git a/CMSIS/NN/Include/arm_nnfunctions.h b/CMSIS/NN/Include/arm_nnfunctions.h
index 7ab6349..60d9872 100644
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        19 May 2022
- * $Revision:    V.10.1.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.10.1.1
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -112,7 +112,7 @@
    * Copyright Notice
    * ------------
    *
-   * Copyright (C) 2010-2019 Arm Limited. All rights reserved.
+   * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
    *
    * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601
    *
@@ -1872,7 +1872,6 @@
  * @brief Q7 RELU function
  * @param[in,out]   data        pointer to input
  * @param[in]       size        number of elements
- * @return none.
  */
 
 void arm_relu_q7(q7_t *data, uint16_t size);
@@ -1889,7 +1888,6 @@
  * @brief Q15 RELU function
  * @param[in,out]   data        pointer to input
  * @param[in]       size        number of elements
- * @return none.
  */
 
 void arm_relu_q15(q15_t *data, uint16_t size);
@@ -1900,7 +1898,6 @@
  * @param[in]       size        number of elements
  * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
  * @param[in]       type        type of activation functions
- * @return none.
  */
 
 void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
@@ -1911,7 +1908,6 @@
  * @param[in]       size        number of elements
  * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
  * @param[in]       type        type of activation functions
- * @return none.
  *
  * @details
  *
@@ -1942,7 +1938,6 @@
  * @param[in]       dim_im_out  output tensor dimension
  * @param[in,out]   bufferA     pointer to buffer space for input
  * @param[in,out]   Im_out      pointer to output tensor
- * @return none.
  *
  */
 
@@ -1967,7 +1962,6 @@
  * @param[in]       dim_im_out  output tensor dimension
  * @param[in,out]   bufferA     pointer to buffer space for input
  * @param[in,out]   Im_out      pointer to output tensor
- * @return none.
  *
  */
 
@@ -2156,7 +2150,6 @@
  * @param[in]       nb_batches  number of batches
  * @param[in]       dim_vec     input vector dimension
  * @param[out]      p_out       pointer to output vector
- * @return none.
  *
  * @note This function is an optimized version which is not bit-accurate with
  *       TensorFlow Lite's kernel
@@ -2169,7 +2162,6 @@
  * @param[in]       vec_in      pointer to input vector
  * @param[in]       dim_vec     input vector dimension
  * @param[out]      p_out       pointer to output vector
- * @return none.
  *
  * @note This function is an optimized version which is not bit-accurate with
  *       TensorFlow Lite's kernel
diff --git a/CMSIS/NN/Include/arm_nnsupportfunctions.h b/CMSIS/NN/Include/arm_nnsupportfunctions.h
index f25d785..542bc15 100644
--- a/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/CMSIS/NN/Include/arm_nnsupportfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        7 July 2022
- * $Revision:    V.9.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.9.0.1
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -129,7 +129,6 @@
  * @param[in]       *pSrc points to the q7 input vector
  * @param[out]      *pDst points to the q15 output vector
  * @param[in]       blockSize length of the input vector
- * @return none.
  *
  */
 void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
@@ -158,7 +157,6 @@
  * @param[out]      dst        pointer to the q15 output vector
  * @param[in]       block_size length of the input vector
  * @param[in]       offset     offset to be added to each input vector element.
- * @return none.
  *
  * @details  This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of
  *           the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its
@@ -800,7 +798,6 @@
  * @param[out]      *pDst         pointer to the output vector
  * @param[in]       out_shift     amount of right-shift for output
  * @param[in]       blockSize     number of samples in each vector
- * @return none.
  *
  * <b>Scaling and Overflow Behavior:</b>
  * \par
@@ -817,7 +814,6 @@
  * @param[out]      *pDst         pointer to the output vector
  * @param[in]       out_shift     amount of right-shift for output
  * @param[in]       blockSize     number of samples in each vector
- * @return none.
  *
  * <b>Scaling and Overflow Behavior:</b>
  * \par
diff --git a/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c b/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
index 72a0b15..50a175c 100644
--- a/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
+++ b/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nn_activations_q7.c
  * Description:  Q7 neural network activation function using direct table look-up
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,20 +40,11 @@
  * @{
  */
 
-/**
- * @brief Q7 neural network activation function using direct table look-up
- * @param[in,out]   data        pointer to input
- * @param[in]       size        number of elements
- * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
- * @param[in]       type        type of activation functions
+/*
+ * Q7 neural network activation function using direct table look-up
  *
- * @details
+ * Refer header file for details.
  *
- * This is the direct table look-up approach.
- *
- * Assume here the integer part of the fixed-point is <= 3.
- * More than 3 just not making much sense, makes no difference with
- * saturation followed by any of these activation functions.
  */
 
 void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type)
diff --git a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
index 1d4ea4e..61ddc6e 100644
--- a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
+++ b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_relu_q15.c
  * Description:  Q15 version of ReLU
  *
- * $Date:        20. July 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,14 +40,10 @@
  * @{
  */
 
-/**
- * @brief Q15 RELU function
- * @param[in,out]   data        pointer to input
- * @param[in]       size        number of elements
+/*
+ * Q15 ReLu function
  *
- * @details
- *
- * Optimized relu with QSUB instructions.
+ * Refer header file for details.
  *
  */
 
diff --git a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
index a3163cd..eba5da7 100644
--- a/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
+++ b/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_relu_q7.c
  * Description:  Q7 version of ReLU
  *
- * $Date:        20. July 2021
- * $Revision:    V.1.1.3
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.4
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,14 +40,10 @@
  * @{
  */
 
-/**
- * @brief Q7 RELU function
- * @param[in,out]   data        pointer to input
- * @param[in]       size        number of elements
+/*
+ * Q7 ReLu function
  *
- * @details
- *
- * Optimized relu with QSUB instructions.
+ * Refer header file for details.
  *
  */
 
diff --git a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
index 7ee8f5d..25a80e8 100644
--- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_elementwise_mul_s8
  * Description:  Element wise multiplication
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,10 +40,10 @@
  * @{
  */
 
-/**
- * @brief s8 element wise multiplication of two vectors
+/*
+ * s8 element wise multiplication of two vectors
  *
- * @note   Refer header file for details.
+ * Refer header file for details.
  *
  */
 
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
index a6a5cc7..f3f5e6b 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_1x1_HWC_q7_fast_nonsquare.c
  * Description:  Fast Q7 version of 1x1 convolution (non-square shape)
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,43 +40,10 @@
  * @{
  */
 
-/**
- * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
+/*
+ * Fast Q7 version of 1x1 convolution (non-sqaure shape)
+ * Refer function header for details
  *
- * This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1
- * and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise
- * separable convolution.
- *
- * This function is the version with full list of optimization tricks, but with
- * some constraints:
- *   ch_im_in is multiple of 4
- *   ch_im_out is multiple of 2
- *
- * [1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications
- * https://arxiv.org/abs/1704.04861
  */
 
 arm_cmsis_nn_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
index bcfc2c2..79f3d35 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q15_basic.c
  * Description:  Q15 version of convolution
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,35 +40,9 @@
  * @{
  */
 
-/**
- * @brief Basic Q15 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * This basic version is designed to work for any input tensor and weight
- * dimension.
+/*
+ * Basic Q15 convolution function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
index 221d93e..707c658 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q15_fast.c
  * Description:  Fast Q15 version of convolution
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,43 +40,9 @@
  * @{
  */
 
-/**
- * @brief Fast Q15 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in is multiple of 2
- *
- * ch_im_out is multiple of 2
- *
- * dim_im_out is a multiple of 2
- *
+/*
+ * Fast Q15 convolution function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
index 412a4a4..33ddc42 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q15_fast.c
  * Description:  Fast Q15 version of convolution
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,46 +40,9 @@
  * @{
  */
 
-/**
- * @brief Fast Q15 convolution function (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in is multiple of 2
- *
- * ch_im_out is multiple of 2
- *
+/*
+ * Fast Q15 convolution function (non-sqaure shape)
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
index 9822b25..2bde43e 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q7_RGB.c
  * Description:  Q7 version of convolution for RGB image
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,42 +40,9 @@
  * @{
  */
 
-/**
- * @brief Q7 convolution function for RGB image
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in equals 3
- *
- * This kernel is written exclusively for convolution with ch_im_in
- * equals 3. This applies on the first layer of CNNs which has input
- * image with RGB format.
+/*
+ * Q7 convolution function for RGB image
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
index 55b50c3..0d5bad3 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q7_basic.c
  * Description:	 Q7 version of convolution
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,35 +40,9 @@
  * @{
  */
 
-/**
- * @brief Basic Q7 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * This basic version is designed to work for any input tensor and weight
- * dimension.
+/*
+ * Basic Q7 convolution function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
index 1a92a97..39c3563 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q7_basic.c
  * Description:	 Q7 version of convolution
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,29 +40,10 @@
  * @{
  */
 
-/**
- * @brief Basic Q7 convolution function (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+/*
+ * Basic Q7 convolution function (non-sqaure shape)
+ * Refer function header for details
+ *
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
index 71ac847..100e7fd 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q7_fast.c
  * Description:  Fast Q7 version of convolution
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,53 +40,9 @@
  * @{
  */
 
-/**
- * @brief Fast Q7 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in is multiple of 4    ( because of the SIMD32 read and swap )
- *
- * ch_im_out is multiple of 2    ( bacause 2x2 mat_mult kernel )
- *
- * The im2col converts the Q7 tensor input into Q15 column, which is stored in
- * bufferA. There is reordering happenning during this im2col process with
- * arm_q7_to_q15_reordered_no_shift. For every four elements, the second and
- * third elements are swapped.
- *
- * The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the
- * GEMM computation with the reordered columns.
- *
- * To speed-up the determination of the padding condition, we split the
- * computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}.
- * This reduces the total number of boundary condition checks and improves
- * the data copying performance.
+/*
+ * Fast Q7 convolution function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
index 006fcce..678ee9d 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_HWC_q7_fast_nonsquare.c
  * Description:  Fast Q7 version of convolution (non-sqaure shape)
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,36 +40,9 @@
  * @{
  */
 
-/**
- * @brief Fast Q7 convolution function (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
- *
- * This function is the version with full list of optimization tricks, but with
- * some constraints:
- *   ch_im_in is multiple of 4
- *   ch_im_out is multiple of 2
+/*
+ * Fast Q7 convolution function (non-sqaure shape)
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
index d1d3d5d..46c0014 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_depthwise_separable_conv_HWC_q7.c
  * Description:  Q7 depthwise separable convolution function
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,44 +40,9 @@
  * @{
  */
 
-/**
- * @brief Q7 depthwise separable convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimension
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in equals ch_im_out
- *
- * Implementation:
- * There are 3 nested loop here:
- * Inner loop: calculate each output value with MAC instruction over an accumulator
- * Mid   loop: loop over different output channel
- * Outer loop: loop over different output (x, y)
+/*
+ * Q7 depthwise separable convolution function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
@@ -264,13 +229,13 @@
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP_%=\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 #else
                 /*
@@ -308,13 +273,13 @@
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP_%=\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 
 #endif /* ARM_MATH_BIG_ENDIAN */
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
index e75e011..c460698 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_depthwise_separable_conv_HWC_q7_nonsquare.c
  * Description:  Q7 depthwise separable convolution function (non-square shape)
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,36 +40,9 @@
  * @{
  */
 
-/**
- * @brief Q7 depthwise separable convolution function (non-square shape)
- * @param[in]       Im_in         pointer to input tensor
- * @param[in]       dim_im_in_x   input tensor dimension x
- * @param[in]       dim_im_in_y   input tensor dimension y
- * @param[in]       ch_im_in      number of input tensor channels
- * @param[in]       wt            pointer to kernel weights
- * @param[in]       ch_im_out     number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x  filter kernel size x
- * @param[in]       dim_kernel_y  filter kernel size y
- * @param[in]       padding_x     padding sizes x
- * @param[in]       padding_y     padding sizes y
- * @param[in]       stride_x      convolution stride x
- * @param[in]       stride_y      convolution stride y
- * @param[in]       bias          pointer to bias
- * @param[in]       bias_shift    amount of left-shift for bias
- * @param[in]       out_shift     amount of right-shift for output
- * @param[in,out]   Im_out        pointer to output tensor
- * @param[in]       dim_im_out_x  output tensor dimension x
- * @param[in]       dim_im_out_y  output tensor dimension y
- * @param[in,out]   bufferA       pointer to buffer space for input
- * @param[in,out]   bufferB       pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
- * constraints checking.
- *
- * This function is the version with full list of optimization tricks, but with
- * some constraints:
- *   ch_im_in is equal to ch_im_out
- *
+/*
+ * Q7 depthwise separable convolution function (non-square shape)
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
@@ -271,13 +244,13 @@
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 #else
                 //  r0    r1    r2    r3    r4   r5
@@ -313,13 +286,13 @@
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 #endif /*ARM_MATH_BIG_ENDIAN */
 
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
index 05c95b6..bb1ad52 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nn_mat_mult_kernel_q7_q15.c
  * Description:  Matrix-multiplication function for convolution
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -30,10 +30,10 @@
 #include "arm_nnfunctions.h"
 #include "arm_nnsupportfunctions.h"
 
-/**
- * @brief Matrix-multiplication function for convolution.
+/*
+ * Matrix-multiplication function for convolution.
  *
- * @details Refer to header file for details.
+ * Refer to header file for details.
  *
  */
 
diff --git a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
index 0870ac3..a779471 100644
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nn_mat_mult_kernel_q7_q15_reordered.c
  * Description:  Matrix-multiplication function for convolution with reordered columns
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -30,10 +30,10 @@
 #include "arm_nnfunctions.h"
 #include "arm_nnsupportfunctions.h"
 
-/**
- * @brief Matrix-multiplication function for convolution with re-ordered input.
+/*
+ * Matrix-multiplication function for convolution with re-ordered input.
  *
- * @details Refer to header file for details.
+ * Refer to header file for details.
  *
  */
 
diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
index 123a324..2dce44c 100644
--- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_fully_connected_mat_q7_vec_q15.c
  * Description:  Mixed Q15-Q7 fully-connected layer function
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,29 +40,9 @@
  * @{
  */
 
-/**
+/*
  * @brief Mixed Q15-Q7 fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
- *  Q7_Q15 version of the fully connected layer
- *
- *  Weights are in q7_t and Activations are in q15_t
- *
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
index b0367a0..f5e91d8 100644
--- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_fully_connected_mat_q7_vec_q15_opt.c
  * Description:  Mixed Q15-Q7 opt fully-connected layer function
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,77 +40,9 @@
  * @{
  */
 
-/**
- * @brief Mixed Q15-Q7 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
- *  Q7_Q15 version of the fully connected layer
- *
- *  Weights are in q7_t and Activations are in q15_t
- *
- *  Limitation: x4 version requires weight reordering to work
- *
- *  Here we use only one pointer to read 4 rows in the weight
- *  matrix. So if the original q7_t matrix looks like this:
- *
- *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
- *
- *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
- *
- *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
- *
- *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
- *
- *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
- *
- *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
- *
- *  We operates on multiple-of-4 rows, so the first four rows becomes
- *
- *  | a11 | a21 | a12 | a22 | a31 | a41 | a32 | a42 |
- *
- *  | a13 | a23 | a14 | a24 | a33 | a43 | a34 | a44 |
- *
- *  | a15 | a25 | a16 | a26 | a35 | a45 | a36 | a46 |
- *
- *  The column left over will be in-order.
- *  which is:
- *  | a17 | a27 | a37 | a47 |
- *
- *  For the left-over rows, we do 1x1 computation, so the data remains
- *  as its original order.
- *
- *  So the stored weight matrix looks like this:
- *
- *  | a11 | a21 | a12 | a22 | a31 | a41 |
- *
- *  | a32 | a42 | a13 | a23 | a14 | a24 |
- *
- *  | a33 | a43 | a34 | a44 | a15 | a25 |
- *
- *  | a16 | a26 | a35 | a45 | a36 | a46 |
- *
- *  | a17 | a27 | a37 | a47 | a51 | a52 |
- *
- *  | a53 | a54 | a55 | a56 | a57 | a61 |
- *
- *  | a62 | a63 | a64 | a65 | a66 | a67 |
- *
+/*
+ * Mixed Q15-Q7 opt fully-connected layer function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
@@ -204,55 +136,47 @@
          */
 
 #ifndef ARM_MATH_BIG_ENDIAN
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #4\n"
-                     "ldr.w r1, [%[pB]], #8\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r1, %[sum]\n"
-                     "smlad %[sum2], r4, r0, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r3, %[sum3]\n"
-                     "smlad %[sum4], r4, r2, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #4\n"
+            "ldr.w r1, [%[pB]], #8\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r1, %[sum]\n"
+            "smlad %[sum2], r4, r0, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r3, %[sum3]\n"
+            "smlad %[sum4], r4, r2, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #else
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #4\n"
-                     "ldr.w r1, [%[pB]], #8\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #4\n"
+            "ldr.w r1, [%[pB]], #8\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #endif /* ARM_MATH_BIG_ENDIAN */
 
 #endif /* USE_INTRINSIC */
diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
index be928f3..4c6e7d6 100644
--- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_fully_connected_q15.c
  * Description:  Q15 basic fully-connected layer function
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,26 +40,9 @@
  * @{
  */
 
-/**
- * @brief Q15 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
+/*
+ * Q15 opt fully-connected layer function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_fully_connected_q15(const q15_t *pV,
diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
index ef0b413..27823e3 100644
--- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_fully_connected_q15_opt.c
  * Description:  Q15 opt fully-connected layer function
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,57 +40,9 @@
  * @{
  */
 
-/**
+/*
  * @brief Q15 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
- *  Here we use only one pointer to read 4 rows in the weight
- *  matrix. So if the original matrix looks like this:
- *
- *  | a11 | a12 | a13 |
- *
- *  | a21 | a22 | a23 |
- *
- *  | a31 | a32 | a33 |
- *
- *  | a41 | a42 | a43 |
- *
- *  | a51 | a52 | a53 |
- *
- *  | a61 | a62 | a63 |
- *
- *  We operates on multiple-of-4 rows, so the first four rows becomes
- *
- *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
- *
- *  | a13 | a23 | a33 | a43 |
- *
- *  Remaining rows are kept the same original order.
- *
- *  So the stored weight matrix looks like this:
- *
- *
- *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
- *
- *  | a13 | a23 | a33 | a43 | a51 | a52 | a53 | a61 |
- *
- *  | a62 | a63 |
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_fully_connected_q15_opt(const q15_t *pV,
@@ -155,26 +107,22 @@
          * activation data: inV
          */
 
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #4\n"
-                     "ldr.w r0, [%[pB]], #16\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "ldr.w r1, [%[pB] , #-12]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r2, [%[pB] , #-8]\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "ldr.w r3, [%[pB] , #-4]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #4\n"
+            "ldr.w r0, [%[pB]], #16\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "ldr.w r1, [%[pB] , #-12]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r2, [%[pB] , #-8]\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "ldr.w r3, [%[pB] , #-4]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 
 #endif /* USE_INTRINSIC */
 
diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
index f54004d..f5a9357 100644
--- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_fully_connected_q7.c
  * Description:  Q7 basic fully-connected layer function
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,28 +40,9 @@
  * @{
  */
 
-/**
- * @brief Q7 basic fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: dim_vec
- *
- * This basic function is designed to work with regular weight
- * matrix without interleaving.
- *
+/*
+ * Q7 basic fully-connected layer function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_fully_connected_q7(const q7_t *pV,
diff --git a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
index 009201f..b8db8d7 100644
--- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_fully_connected_q7_opt.c
  * Description:  Q7 basic fully-connected layer function
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,89 +40,9 @@
  * @{
  */
 
-/**
- * @brief Q7 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: dim_vec
- *
- * This opt function is designed to work with interleaved weight
- * matrix. The vector input is assumed in q7_t format, we call
- *  arm_q7_to_q15_no_shift_shuffle function to expand into
- *  q15_t format with certain weight re-ordering, refer to the function
- *  comments for more details.
- *  Here we use only one pointer to read 4 rows in the weight
- *  matrix. So if the original q7_t matrix looks like this:
- *
- *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
- *
- *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
- *
- *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
- *
- *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
- *
- *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
- *
- *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
- *
- *
- *  We operates on multiple-of-4 rows, so the first four rows becomes
- *
- *  | a11 | a21 | a13 | a23 | a31 | a41 | a33 | a43 |
- *
- *  | a12 | a22 | a14 | a24 | a32 | a42 | a34 | a44 |
- *
- *  | a15 | a25 | a35 | a45 | a16 | a26 | a36 | a46 |
- *
- *  So within the kernel, we first read the re-ordered vector in as:
- *
- *  | b1  | b3  | and | b2  | b4  |
- *
- *  the four q31_t weights will look like
- *
- *  | a11 | a13 |, | a21 | a23 |, | a31 | a33 |, | a41 | a43 |
- *
- *  | a12 | a14 |, | a22 | a24 |, | a32 | a34 |, | a42 | a44 |
- *
- *  The column left over will be in-order.
- *  which is:
- *
- *  | a17 | a27 | a37 | a47 |
- *
- *  For the left-over rows, we do 1x1 computation, so the data remains
- *  as its original order.
- *
- *  So the stored weight matrix looks like this:
- *
- *  | a11 | a21 | a13 | a23 | a31 | a41 |
- *
- *  | a33 | a43 | a12 | a22 | a14 | a24 |
- *
- *  | a32 | a42 | a34 | a44 | a15 | a25 |
- *
- *  | a35 | a45 | a16 | a26 | a36 | a46 |
- *
- *  | a17 | a27 | a37 | a47 | a51 | a52 |
- *
- *  | a53 | a54 | a55 | a56 | a57 | a61 |
- *
- *  | a62 | a63 | a64 | a65 | a66 | a67 |
- *
- *
+/*
+ * Q7 opt fully-connected layer function
+ * Refer function header for details
  */
 
 arm_cmsis_nn_status arm_fully_connected_q7_opt(const q7_t *pV,
@@ -237,81 +157,73 @@
          */
 
 #ifndef ARM_MATH_BIG_ENDIAN
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #8\n"
-                     "ldr.w r1, [%[pB]], #16\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r1, %[sum]\n"
-                     "smlad %[sum2], r4, r0, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-12]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r3, %[sum3]\n"
-                     "smlad %[sum4], r4, r2, %[sum4]\n"
-                     "ldr.w r4, [%[pA], #-4]\n"
-                     "ldr.w r1, [%[pB], #-8]\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r1, %[sum]\n"
-                     "smlad %[sum2], r4, r0, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r3, %[sum3]\n"
-                     "smlad %[sum4], r4, r2, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #8\n"
+            "ldr.w r1, [%[pB]], #16\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r1, %[sum]\n"
+            "smlad %[sum2], r4, r0, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-12]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r3, %[sum3]\n"
+            "smlad %[sum4], r4, r2, %[sum4]\n"
+            "ldr.w r4, [%[pA], #-4]\n"
+            "ldr.w r1, [%[pB], #-8]\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r1, %[sum]\n"
+            "smlad %[sum2], r4, r0, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r3, %[sum3]\n"
+            "smlad %[sum4], r4, r2, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #else
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #8\n"
-                     "ldr.w r1, [%[pB]], #16\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-12]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "ldr.w r4, [%[pA], #-4]\n"
-                     "ldr.w r1, [%[pB], #-8]\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #8\n"
+            "ldr.w r1, [%[pB]], #16\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-12]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "ldr.w r4, [%[pA], #-4]\n"
+            "ldr.w r1, [%[pB], #-8]\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #endif /* ARM_MATH_BIG_ENDIAN */
 
 #endif /* USE_INTRINSIC */
diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
index d6a45ef..720ed6d 100644
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nn_mult_q15.c
  * Description:  Q15 vector multiplication with variable output shifts
  *
- * $Date:        20. July 2021
- * $Revision:    V.1.1.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -39,18 +39,10 @@
  * @{
  */
 
-/**
- * @brief           Q7 vector multiplication with variable output shifts
- * @param[in]       *pSrcA        pointer to the first input vector
- * @param[in]       *pSrcB        pointer to the second input vector
- * @param[out]      *pDst         pointer to the output vector
- * @param[in]       out_shift     amount of right-shift for output
- * @param[in]       blockSize     number of samples in each vector
+/*
+ * Q7 vector multiplication with variable output shifts
+ * Refer function header for details
  *
- * <b>Scaling and Overflow Behavior:</b>
- * \par
- * The function uses saturating arithmetic.
- * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
  */
 
 void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)
diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
index fdced4c..6c19557 100644
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nn_mult_q7.c
  * Description:  Q7 vector multiplication with variable output shifts
  *
- * $Date:        20. July 2021
- * $Revision:    V.1.1.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -39,18 +39,9 @@
  * @{
  */
 
-/**
- * @brief           Q7 vector multiplication with variable output shifts
- * @param[in]       *pSrcA        pointer to the first input vector
- * @param[in]       *pSrcB        pointer to the second input vector
- * @param[out]      *pDst         pointer to the output vector
- * @param[in]       out_shift     amount of right-shift for output
- * @param[in]       blockSize     number of samples in each vector
- *
- * <b>Scaling and Overflow Behavior:</b>
- * \par
- * The function uses saturating arithmetic.
- * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
+/*
+ * Q7 vector multiplication with variable output shifts
+ * Refer function header for details
  */
 
 void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)
diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
index 6f2f575..1e25287 100644
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_q7_to_q15_no_shift.c
  * Description:  Converts the elements of the Q7 vector to Q15 vector without left-shift
  *
- * $Date:        May 29, 2020
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -39,20 +39,9 @@
  * @{
  */
 
-/**
+/*
  * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift
- * @param[in]       *pSrc points to the Q7 input vector
- * @param[out]      *pDst points to the Q15 output vector
- * @param[in]       blockSize length of the input vector
- *
- * \par Description:
- *
- * The equation used for the conversion process is:
- *
- * <pre>
- * 	pDst[n] = (q15_t) pSrc[n];   0 <= n < blockSize.
- * </pre>
- *
+ * Refer function header for details
  */
 
 void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize)
diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
index 8abbc3a..b0261eb 100644
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_q7_to_q15_reordered_no_shift.c
  * Description:  Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
  *
- * $Date:        July 20, 2021
- * $Revision:    V.1.1.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -39,13 +39,10 @@
  * @{
  */
 
-/**
- * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
- * @param[in]       *pSrc points to the Q7 input vector
- * @param[out]      *pDst points to the Q15 output vector
- * @param[in]       blockSize length of the input vector
+/*
+ * Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
  *
- * @details
+ * Refer to header for details
  *
  * This function does the q7 to q15 expansion with re-ordering
  *
diff --git a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
index 765929d..643d4c9 100644
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -22,8 +22,8 @@
  * Description:  Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset. The re-ordering
  *               is a signature of sign extension intrinsic(DSP extension).
  *
- * $Date:        May 29, 2020
- * $Revision:    V.2.0.3
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.4
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,10 +40,10 @@
  * @{
  */
 
-/**
- * @brief Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset.
+/*
+ * Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset.
  *
- * @note  Refer header file for details.
+ * Refer header file for details.
  *
  */
 
diff --git a/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c b/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
index 5a3b1af..2594496 100644
--- a/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
+++ b/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_pool_q7_HWC.c
  * Description:  Pooling function implementations
  *
- * $Date:        20. July 2021
- * $Revision:    V.1.1.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -33,8 +33,8 @@
 
 #if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
 
-/**
- * @brief A few utility functions used by pooling functions
+/*
+ * A few utility functions used by pooling functions
  *
  *
  */
diff --git a/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c b/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
index 5464ca1..7ebb496 100644
--- a/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
+++ b/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_reshape_s8.c
  * Description:  Reshape a s8 vector
  *
- * $Date:        September 2019
- * $Revision:    V.1.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -40,7 +40,7 @@
  * @{
  */
 
-/**
+/*
  * Basic s8 reshape function.
  *
  * Refer header file for details.
diff --git a/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c b/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
index 18f3e83..b5e1ec3 100644
--- a/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
+++ b/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2018, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_softmax_q15.c
  * Description:  Q15 softmax function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -39,13 +39,9 @@
  * @{
  */
 
-/**
- * @brief Q15 softmax function
- * @param[in]       vec_in      pointer to input vector
- * @param[in]       dim_vec     input vector dimention
- * @param[out]      p_out       pointer to output vector
+/*
+ * Q15 softmax function
  *
- * @details
  *
  *  Here, instead of typical e based softmax, we use
  *  2-based softmax, i.e.,:
diff --git a/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c b/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
index 58eb990..dee7f34 100644
--- a/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
+++ b/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_softmax_q7.c
  * Description:  Q7 softmax function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -39,13 +39,8 @@
  * @{
  */
 
-/**
- * @brief Q7 softmax function
- * @param[in]       vec_in      pointer to input vector
- * @param[in]       dim_vec     input vector dimention
- * @param[out]      p_out       pointer to output vector
- *
- * @details
+/*
+ * Q7 softmax function
  *
  *  Here, instead of typical natural logarithm e based softmax, we use
  *  2-based softmax here, i.e.,:
diff --git a/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c b/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
index 66e892e..359d89f 100644
--- a/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
+++ b/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2019, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_softmax_with_batch_q7.c
  * Description:  Q7 softmax function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.2
  *
  * Target Processor:  Cortex-M and Cortex-A cores
  *
@@ -39,14 +39,10 @@
  * @{
  */
 
-/**
- * @brief Q7 softmax function with batch parameter
- * @param[in]       vec_in      pointer to input vector
- * @param[in]       nb_batches  number of batches
- * @param[in]       dim_vec     input vector dimention
- * @param[out]      p_out       pointer to output vector
+/*
+ * Q7 softmax function with batch parameter
  *
- * @details
+ * details
  *
  *  Here, instead of typical natural logarithm e based softmax, we use
  *  2-based softmax here, i.e.,: