CMSIS-DSP: Removed float promotion issues
Corrected some additional warnings when compiling with AC5 compiler.
Updated version numbers,
diff --git a/ARM.CMSIS.pdsc b/ARM.CMSIS.pdsc
index 2d3d618..0153a50 100644
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc
@@ -10,6 +10,7 @@
   <releases>
     <release version="5.8.1">
       Active development ...
+      CMSIS-DSP: 1.10.0 (see revision history for details)
     </release>
     <release version="5.8.0" date="2021-06-24">
       CMSIS-Core(M): 5.5.0 (see revision history for details)
@@ -2733,7 +2734,7 @@
     </component>
 
     <!-- CMSIS-DSP component -->
-    <component Cclass="CMSIS" Cgroup="DSP" Cvariant="Source"  Cversion="1.9.0-dev" isDefaultVariant="true" condition="CMSIS DSP">
+    <component Cclass="CMSIS" Cgroup="DSP" Cvariant="Source"  Cversion="1.10.0-dev" isDefaultVariant="true" condition="CMSIS DSP">
       <description>CMSIS-DSP Library for Cortex-M, SC000, and SC300</description>
       <files>
         <!-- CPU independent -->
diff --git a/CMSIS/DSP/Include/arm_common_tables.h b/CMSIS/DSP/Include/arm_common_tables.h
index 91d2be0..f3ccfb5 100644
--- a/CMSIS/DSP/Include/arm_common_tables.h
+++ b/CMSIS/DSP/Include/arm_common_tables.h
@@ -3,8 +3,8 @@
  * Title:        arm_common_tables.h
  * Description:  Extern declaration for common tables
  *
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
diff --git a/CMSIS/DSP/Include/arm_common_tables_f16.h b/CMSIS/DSP/Include/arm_common_tables_f16.h
index f40c1a4..f9a63fe 100755
--- a/CMSIS/DSP/Include/arm_common_tables_f16.h
+++ b/CMSIS/DSP/Include/arm_common_tables_f16.h
@@ -3,8 +3,8 @@
  * Title:        arm_common_tables_f16.h
  * Description:  Extern declaration for common tables
  *
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
diff --git a/CMSIS/DSP/Include/arm_const_structs.h b/CMSIS/DSP/Include/arm_const_structs.h
index 15e7726..59026db 100644
--- a/CMSIS/DSP/Include/arm_const_structs.h
+++ b/CMSIS/DSP/Include/arm_const_structs.h
@@ -4,8 +4,8 @@
  * Description:  Constant structs that are initialized for user convenience.
  *               For example, some can be given as arguments to the arm_cfft_f32() function.
  *
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
diff --git a/CMSIS/DSP/Include/arm_const_structs_f16.h b/CMSIS/DSP/Include/arm_const_structs_f16.h
index 584941e..0984d74 100755
--- a/CMSIS/DSP/Include/arm_const_structs_f16.h
+++ b/CMSIS/DSP/Include/arm_const_structs_f16.h
@@ -4,8 +4,8 @@
  * Description:  Constant structs that are initialized for user convenience.
  *               For example, some can be given as arguments to the arm_cfft_f16() function.
  *
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
@@ -74,4 +74,4 @@
 }
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/CMSIS/DSP/Include/arm_helium_utils.h b/CMSIS/DSP/Include/arm_helium_utils.h
index 54a9db5..ae9037c 100755
--- a/CMSIS/DSP/Include/arm_helium_utils.h
+++ b/CMSIS/DSP/Include/arm_helium_utils.h
@@ -3,8 +3,8 @@
  * Title:        arm_helium_utils.h
  * Description:  Utility functions for Helium development
  *
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
diff --git a/CMSIS/DSP/Include/arm_math.h b/CMSIS/DSP/Include/arm_math.h
index 79ce541..300c5cf 100644
--- a/CMSIS/DSP/Include/arm_math.h
+++ b/CMSIS/DSP/Include/arm_math.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     arm_math.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/arm_math_f16.h b/CMSIS/DSP/Include/arm_math_f16.h
index c046a12..daf0c53 100755
--- a/CMSIS/DSP/Include/arm_math_f16.h
+++ b/CMSIS/DSP/Include/arm_math_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     arm_math_f16.h
  * @brief    Public header file for f16 function of the CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/arm_math_memory.h b/CMSIS/DSP/Include/arm_math_memory.h
index 771bb7c..6694a1d 100755
--- a/CMSIS/DSP/Include/arm_math_memory.h
+++ b/CMSIS/DSP/Include/arm_math_memory.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     arm_math_memory.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/arm_math_types.h b/CMSIS/DSP/Include/arm_math_types.h
index e9f6ed2..7680eef 100755
--- a/CMSIS/DSP/Include/arm_math_types.h
+++ b/CMSIS/DSP/Include/arm_math_types.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     arm_math_types.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/arm_math_types_f16.h b/CMSIS/DSP/Include/arm_math_types_f16.h
index baf8750..f4064b9 100755
--- a/CMSIS/DSP/Include/arm_math_types_f16.h
+++ b/CMSIS/DSP/Include/arm_math_types_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     arm_math_types_f16.h
  * @brief    Public header file for f16 function of the CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -137,7 +137,7 @@
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 #define F16_MAX   ((float16_t)__FLT16_MAX__)
-#define F16_MIN   (-(float16_t)__FLT16_MAX__)
+#define F16_MIN   (-(_Float16)__FLT16_MAX__)
 
 #define F16_ABSMAX   ((float16_t)__FLT16_MAX__)
 #define F16_ABSMIN   ((float16_t)0.0f16)
diff --git a/CMSIS/DSP/Include/arm_mve_tables.h b/CMSIS/DSP/Include/arm_mve_tables.h
index fe41a44..d116b18 100755
--- a/CMSIS/DSP/Include/arm_mve_tables.h
+++ b/CMSIS/DSP/Include/arm_mve_tables.h
@@ -4,8 +4,8 @@
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *               used for MVE implementation only
  *
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
diff --git a/CMSIS/DSP/Include/arm_mve_tables_f16.h b/CMSIS/DSP/Include/arm_mve_tables_f16.h
index c93aed1..c3a9a7e 100755
--- a/CMSIS/DSP/Include/arm_mve_tables_f16.h
+++ b/CMSIS/DSP/Include/arm_mve_tables_f16.h
@@ -4,8 +4,8 @@
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *               used for MVE implementation only
  *
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
diff --git a/CMSIS/DSP/Include/arm_vec_math.h b/CMSIS/DSP/Include/arm_vec_math.h
index 029088f..d9134c5 100755
--- a/CMSIS/DSP/Include/arm_vec_math.h
+++ b/CMSIS/DSP/Include/arm_vec_math.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     arm_vec_math.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/arm_vec_math_f16.h b/CMSIS/DSP/Include/arm_vec_math_f16.h
index c79955b..91bd28a 100755
--- a/CMSIS/DSP/Include/arm_vec_math_f16.h
+++ b/CMSIS/DSP/Include/arm_vec_math_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     arm_vec_math_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -68,11 +68,11 @@
     b = 2.0f16 - xinv.f * ax;
     xinv.f = xinv.f * b;
 
-    xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
+    xinv.f = vdupq_m_n_f16(xinv.f, F16INFINITY, vcmpeqq_n_f16(x, 0.0f));
     /*
      * restore sign
      */
-    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
+    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq_n_f16(x, 0.0f));
 
     return xinv.f;
 }
@@ -105,11 +105,11 @@
     b = 2.0f16 - xinv.f * ax;
     xinv.f = xinv.f * b;
 
-    xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
+    xinv.f = vdupq_m_n_f16(xinv.f, F16INFINITY, vcmpeqq_n_f16(x, 0.0f));
     /*
      * restore sign
      */
-    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
+    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq_n_f16(x, 0.0f));
 
     return xinv.f;
 }
@@ -211,7 +211,7 @@
      */
     vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f16);
     // set log0 down to -inf
-    vecAcc0 = vdupq_m(vecAcc0, -F16INFINITY, vcmpeqq(vecIn, 0.0f));
+    vecAcc0 = vdupq_m_n_f16(vecAcc0, -(_Float16)F16INFINITY, vcmpeqq_n_f16(vecIn, 0.0f));
     return vecAcc0;
 }
 
@@ -228,7 +228,7 @@
     // Reconstruct
     poly = (float16x8_t) (vqaddq_s16((int16x8_t) (poly), vqshlq_n_s16(m, 10)));
 
-    poly = vdupq_m(poly, 0.0f, vcmpltq_n_s16(m, -14));
+    poly = vdupq_m_n_f16(poly, 0.0f16, vcmpltq_n_s16(m, -14));
     return poly;
 }
 
@@ -265,20 +265,20 @@
     vecW = vmulq(vecSx, v.f);
 
     // v.f = v.f * (8 + w * (-28 + w * (56 + w * (-70 + w *(56 + w * (-28 + w * (8 - w)))))));
-    vecTmp = vsubq(vdupq_n_f16(8.0f), vecW);
-    vecTmp = vfmasq(vecW, vecTmp, -28.0f);
-    vecTmp = vfmasq(vecW, vecTmp, 56.0f);
-    vecTmp = vfmasq(vecW, vecTmp, -70.0f);
-    vecTmp = vfmasq(vecW, vecTmp, 56.0f);
-    vecTmp = vfmasq(vecW, vecTmp, -28.0f);
-    vecTmp = vfmasq(vecW, vecTmp, 8.0f);
+    vecTmp = vsubq(vdupq_n_f16(8.0f16), vecW);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, -28.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, 56.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, -70.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, 56.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, -28.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, 8.0f16);
     v.f = vmulq(v.f,  vecTmp);
 
-    v.f = vdupq_m(v.f, F16INFINITY, vcmpeqq(vecIn, 0.0f));
+    v.f = vdupq_m_n_f16(v.f, F16INFINITY, vcmpeqq_n_f16(vecIn, 0.0f));
     /*
      * restore sign
      */
-    v.f = vnegq_m(v.f, v.f, vcmpltq(vecIn, 0.0f));
+    v.f = vnegq_m(v.f, v.f, vcmpltq_n_f16(vecIn, 0.0f));
     return v.f;
 }
 
@@ -286,10 +286,10 @@
     f16x8_t val)
 {
     f16x8_t         x =
-        vminnmq_f16(vmaxnmq_f16(val, vdupq_n_f16(-10.f)), vdupq_n_f16(10.0f));
-    f16x8_t         exp2x = vexpq_f16(vmulq_n_f16(x, 2.f));
-    f16x8_t         num = vsubq_n_f16(exp2x, 1.f);
-    f16x8_t         den = vaddq_n_f16(exp2x, 1.f);
+        vminnmq_f16(vmaxnmq_f16(val, vdupq_n_f16(-10.f16)), vdupq_n_f16(10.0f16));
+    f16x8_t         exp2x = vexpq_f16(vmulq_n_f16(x, 2.f16));
+    f16x8_t         num = vsubq_n_f16(exp2x, 1.f16);
+    f16x8_t         den = vaddq_n_f16(exp2x, 1.f16);
     f16x8_t         tanh = vmulq_f16(num, vrecip_f16(den));
     return tanh;
 }
diff --git a/CMSIS/DSP/Include/dsp/basic_math_functions.h b/CMSIS/DSP/Include/dsp/basic_math_functions.h
index b348152..d872f9c 100755
--- a/CMSIS/DSP/Include/dsp/basic_math_functions.h
+++ b/CMSIS/DSP/Include/dsp/basic_math_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     basic_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h b/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h
index 1e4acb2..de9f58b 100755
--- a/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     basic_math_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/bayes_functions.h b/CMSIS/DSP/Include/dsp/bayes_functions.h
index beca38e..824c50e 100755
--- a/CMSIS/DSP/Include/dsp/bayes_functions.h
+++ b/CMSIS/DSP/Include/dsp/bayes_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     bayes_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/bayes_functions_f16.h b/CMSIS/DSP/Include/dsp/bayes_functions_f16.h
index f2c9ad8..5c4ae42 100755
--- a/CMSIS/DSP/Include/dsp/bayes_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/bayes_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     bayes_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/complex_math_functions.h b/CMSIS/DSP/Include/dsp/complex_math_functions.h
index 5bf3e17..6d64fb0 100755
--- a/CMSIS/DSP/Include/dsp/complex_math_functions.h
+++ b/CMSIS/DSP/Include/dsp/complex_math_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     complex_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h b/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h
index da78559..b17f931 100755
--- a/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     complex_math_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/controller_functions.h b/CMSIS/DSP/Include/dsp/controller_functions.h
index 53823db..7c08c24 100755
--- a/CMSIS/DSP/Include/dsp/controller_functions.h
+++ b/CMSIS/DSP/Include/dsp/controller_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     controller_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/controller_functions_f16.h b/CMSIS/DSP/Include/dsp/controller_functions_f16.h
index b0bdd78..8fae483 100755
--- a/CMSIS/DSP/Include/dsp/controller_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/controller_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     controller_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/distance_functions.h b/CMSIS/DSP/Include/dsp/distance_functions.h
index 0af3c6f..c79f7c3 100755
--- a/CMSIS/DSP/Include/dsp/distance_functions.h
+++ b/CMSIS/DSP/Include/dsp/distance_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     distance_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/distance_functions_f16.h b/CMSIS/DSP/Include/dsp/distance_functions_f16.h
index ab01fc6..a7ceb3c 100755
--- a/CMSIS/DSP/Include/dsp/distance_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/distance_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     distance_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/fast_math_functions.h b/CMSIS/DSP/Include/dsp/fast_math_functions.h
index e9e72b4..42c4034 100755
--- a/CMSIS/DSP/Include/dsp/fast_math_functions.h
+++ b/CMSIS/DSP/Include/dsp/fast_math_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     fast_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h b/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h
index 98a13cb..9f0dec4 100755
--- a/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     fast_math_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/filtering_functions.h b/CMSIS/DSP/Include/dsp/filtering_functions.h
index 634edbf..ef7f2dd 100755
--- a/CMSIS/DSP/Include/dsp/filtering_functions.h
+++ b/CMSIS/DSP/Include/dsp/filtering_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     filtering_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -1176,7 +1176,7 @@
 void arm_biquad_cascade_df2T_compute_coefs_f32(
   arm_biquad_cascade_df2T_instance_f32 * S,
   uint8_t numStages,
-  float32_t * pCoeffs);
+  const float32_t * pCoeffs);
 #endif
   /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
diff --git a/CMSIS/DSP/Include/dsp/filtering_functions_f16.h b/CMSIS/DSP/Include/dsp/filtering_functions_f16.h
index 6ccb8a2..fd8b0bb 100755
--- a/CMSIS/DSP/Include/dsp/filtering_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/filtering_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     filtering_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/interpolation_functions.h b/CMSIS/DSP/Include/dsp/interpolation_functions.h
index 42bf746..3c0e7fc 100755
--- a/CMSIS/DSP/Include/dsp/interpolation_functions.h
+++ b/CMSIS/DSP/Include/dsp/interpolation_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     interpolation_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h b/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h
index 01fd87a..8b6e6a9 100755
--- a/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     interpolation_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/matrix_functions.h b/CMSIS/DSP/Include/dsp/matrix_functions.h
index e03a2f1..3d44488 100755
--- a/CMSIS/DSP/Include/dsp/matrix_functions.h
+++ b/CMSIS/DSP/Include/dsp/matrix_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     matrix_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/matrix_functions_f16.h b/CMSIS/DSP/Include/dsp/matrix_functions_f16.h
index 62876a7..3693ec4 100755
--- a/CMSIS/DSP/Include/dsp/matrix_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/matrix_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     matrix_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/quaternion_math_functions.h b/CMSIS/DSP/Include/dsp/quaternion_math_functions.h
index 2e1f2e0..0c5d067 100755
--- a/CMSIS/DSP/Include/dsp/quaternion_math_functions.h
+++ b/CMSIS/DSP/Include/dsp/quaternion_math_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     quaternion_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
diff --git a/CMSIS/DSP/Include/dsp/statistics_functions.h b/CMSIS/DSP/Include/dsp/statistics_functions.h
index ee5c692..84189a2 100755
--- a/CMSIS/DSP/Include/dsp/statistics_functions.h
+++ b/CMSIS/DSP/Include/dsp/statistics_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     statistics_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/statistics_functions_f16.h b/CMSIS/DSP/Include/dsp/statistics_functions_f16.h
index 8ed3a84..fec0b28 100755
--- a/CMSIS/DSP/Include/dsp/statistics_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/statistics_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     statistics_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/support_functions.h b/CMSIS/DSP/Include/dsp/support_functions.h
index 3c2a7de..c1aa559 100755
--- a/CMSIS/DSP/Include/dsp/support_functions.h
+++ b/CMSIS/DSP/Include/dsp/support_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     support_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/support_functions_f16.h b/CMSIS/DSP/Include/dsp/support_functions_f16.h
index 47b6535..5604cab 100755
--- a/CMSIS/DSP/Include/dsp/support_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/support_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     support_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/svm_defines.h b/CMSIS/DSP/Include/dsp/svm_defines.h
index 1f6001f..f93e953 100755
--- a/CMSIS/DSP/Include/dsp/svm_defines.h
+++ b/CMSIS/DSP/Include/dsp/svm_defines.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     svm_defines.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
diff --git a/CMSIS/DSP/Include/dsp/svm_functions.h b/CMSIS/DSP/Include/dsp/svm_functions.h
index 8fdcb13..3acc621 100755
--- a/CMSIS/DSP/Include/dsp/svm_functions.h
+++ b/CMSIS/DSP/Include/dsp/svm_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     svm_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/svm_functions_f16.h b/CMSIS/DSP/Include/dsp/svm_functions_f16.h
index b80ed7c..7c9fbab 100755
--- a/CMSIS/DSP/Include/dsp/svm_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/svm_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     svm_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
@@ -34,6 +34,7 @@
 #include "dsp/utils.h"
 #include "dsp/svm_defines.h"
 
+
 #ifdef   __cplusplus
 extern "C"
 {
@@ -57,24 +58,6 @@
  * 
  */
 
-/**
- * @brief Integer exponentiation
- * @param[in]    x           value
- * @param[in]    nb          integer exponent >= 1
- * @return x^nb
- *
- */
-__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
-{
-    float16_t r = x;
-    nb --;
-    while(nb > 0)
-    {
-        r = r * x;
-        nb--;
-    }
-    return(r);
-}
 
 
 /**
diff --git a/CMSIS/DSP/Include/dsp/transform_functions.h b/CMSIS/DSP/Include/dsp/transform_functions.h
index bf9c43c..b7882b6 100755
--- a/CMSIS/DSP/Include/dsp/transform_functions.h
+++ b/CMSIS/DSP/Include/dsp/transform_functions.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     transform_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Include/dsp/transform_functions_f16.h b/CMSIS/DSP/Include/dsp/transform_functions_f16.h
index 67f1adc..0b1685d 100755
--- a/CMSIS/DSP/Include/dsp/transform_functions_f16.h
+++ b/CMSIS/DSP/Include/dsp/transform_functions_f16.h
@@ -1,8 +1,8 @@
 /******************************************************************************
  * @file     transform_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     23 April 2021
+ * @version  V1.10.0
+ * @date     08 July 2021
  * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c b/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c
index efdab77..99abe12 100644
--- a/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c
@@ -76,4 +76,4 @@
 #include "arm_clip_f32.c"
 #include "arm_clip_q31.c"
 #include "arm_clip_q15.c"
-#include "arm_clip_q7.c"
\ No newline at end of file
+#include "arm_clip_q7.c"
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c
index fb1b9b4..0d6568b 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c
@@ -156,13 +156,13 @@
     /* C = |A| */
 
     /* Calculate absolute and store result in destination buffer. */
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -184,7 +184,7 @@
     /* C = |A| */
 
     /* Calculate absolute and store result in destination buffer. */
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c
index 3c313c6..2b2ae8f 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c
@@ -130,10 +130,10 @@
     /* C = A + B */
 
     /* Add and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -154,7 +154,7 @@
     /* C = A + B */
 
     /* Add and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c
index ec4a6dd..f784a23 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c
@@ -123,9 +123,9 @@
 {
     for (uint32_t i = 0; i < numSamples; i++)
     {                                        
-        if (pSrc[i] > high)                  
+        if ((_Float16)pSrc[i] > (_Float16)high)                  
             pDst[i] = high;                  
-        else if (pSrc[i] < low)              
+        else if ((_Float16)pSrc[i] < (_Float16)low)              
             pDst[i] = low;                   
         else                                 
             pDst[i] = pSrc[i];               
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c
index bc06d99..4cc2799 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c
@@ -126,7 +126,8 @@
   float32_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c
index 1d30287..0d67c37 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c
@@ -116,7 +116,8 @@
   q15_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c
index 36f6526..72b80d3 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c
@@ -116,7 +116,8 @@
   q31_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c
index 89e16af..a41e0c9 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c
@@ -116,7 +116,8 @@
   q7_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c
index 4d99b8c..c1115ac 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c
@@ -129,13 +129,13 @@
     /* C = A * B */
 
     /* Multiply inputs and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -156,7 +156,7 @@
     /* C = A * B */
 
     /* Multiply input and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c
index 2fe26a2..187e511 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c
@@ -124,13 +124,13 @@
     /* C = -A */
 
     /* Negate and store result in destination buffer. */
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -151,7 +151,7 @@
     /* C = -A */
 
     /* Negate and store result in destination buffer. */
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c
index 2ae2f80..9aeb1e7 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c
@@ -128,13 +128,13 @@
     /* C = A + offset */
 
     /* Add offset and store result in destination buffer. */
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -155,7 +155,7 @@
     /* C = A + offset */
 
     /* Add offset and store result in destination buffer. */
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c
index 449d748..b4b1c33 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c
@@ -141,13 +141,13 @@
     /* C = A * scale */
 
     /* Scale input and store result in destination buffer. */
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -168,7 +168,7 @@
     /* C = A * scale */
 
     /* Scale input and store result in destination buffer. */
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c
index eddbbd2..836ad43 100755
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c
@@ -129,13 +129,13 @@
     /* C = A - B */
 
     /* Subtract and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -156,7 +156,7 @@
     /* C = A - B */
 
     /* Subtract and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c b/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
index b918f70..e3b2ef6 100755
--- a/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
+++ b/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
@@ -33,7 +33,6 @@
 #include <limits.h>
 #include <math.h>
 
-#define PI_F 3.1415926535897932384626433832795f16
 
 /**
  * @addtogroup groupBayes
@@ -132,7 +131,7 @@
         tmp = -0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc1);
         tmp -= 0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc2);
 
-        *buffer = tmp + *pLogPrior++;
+        *buffer = (_Float16)tmp + (_Float16)*pLogPrior++;
         buffer++;
     }
 
@@ -176,20 +175,20 @@
         acc2 = 0.0f16;
         for(nbDim = 0; nbDim < S->vectorDimension; nbDim++)
         {
-           sigma = *pSigma + S->epsilon;
-           acc1 += logf(2.0f16 * (_Float16)PI_F * sigma);
-           acc2 += (*pIn - *pTheta) * (*pIn - *pTheta) / sigma;
+           sigma = (_Float16)*pSigma + (_Float16)S->epsilon;
+           acc1 += (_Float16)logf(2.0f * PI * (float32_t)sigma);
+           acc2 += ((_Float16)*pIn - (_Float16)*pTheta) * ((_Float16)*pIn - (_Float16)*pTheta) / (_Float16)sigma;
 
            pIn++;
            pTheta++;
            pSigma++;
         }
 
-        tmp = -0.5f16 * acc1;
-        tmp -= 0.5f16 * acc2;
+        tmp = -0.5f16 * (_Float16)acc1;
+        tmp -= 0.5f16 * (_Float16)acc2;
 
 
-        *buffer = tmp + logf(*pPrior++);
+        *buffer = (_Float16)tmp + (_Float16)logf((float32_t)*pPrior++);
         buffer++;
     }
 
diff --git a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
index 05d7b97..6cf871e 100755
--- a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
+++ b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
@@ -112,7 +112,7 @@
   
       /* Calculate Complex Conjugate and store result in destination buffer. */
       *pDst++ =  *pSrc++;
-      *pDst++ = -*pSrc++;
+      *pDst++ = -(_Float16)*pSrc++;
   
       /* Decrement loop counter */
       blkCnt--;
@@ -139,16 +139,16 @@
 
     /* Calculate Complex Conjugate and store result in destination buffer. */
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -170,7 +170,7 @@
 
     /* Calculate Complex Conjugate and store result in destination buffer. */
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -182,4 +182,4 @@
 /**
   @} end of cmplx_conj group
  */
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
index f75d04e..ff45a9f 100755
--- a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
+++ b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
@@ -285,4 +285,4 @@
   @} end of cmplx_dot_prod group
  */
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
index c2f3198..1f93598 100755
--- a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
+++ b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
@@ -238,4 +238,4 @@
   @} end of cmplx_mag group
  */
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
index 5d5a3a2..df3cbec 100755
--- a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
+++ b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
@@ -171,4 +171,4 @@
   @} end of cmplx_mag_squared group
  */
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
index 3409450..68e1d1b 100755
--- a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
+++ b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
@@ -268,4 +268,4 @@
   @} end of CmplxByCmplxMult group
  */
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
index 248858b..f5909fc 100755
--- a/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
+++ b/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
@@ -141,20 +141,20 @@
 
     in = *pSrcReal++;
     /* store result in destination buffer. */
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     in = *pSrcReal++;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     in = *pSrcReal++;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     in = *pSrcReal++;
-    *pCmplxDst++ = *pSrcCmplx++* in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -177,8 +177,8 @@
 
     in = *pSrcReal++;
     /* store result in destination buffer. */
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -191,4 +191,4 @@
   @} end of CmplxByRealMult group
  */
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/ControllerFunctions/ControllerFunctions.c b/CMSIS/DSP/Source/ControllerFunctions/ControllerFunctions.c
index e406ac2..9abd328 100644
--- a/CMSIS/DSP/Source/ControllerFunctions/ControllerFunctions.c
+++ b/CMSIS/DSP/Source/ControllerFunctions/ControllerFunctions.c
@@ -43,4 +43,4 @@
 #include "arm_sin_cos_q31.c"
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
index 0ed32fc..be5537e 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
@@ -133,8 +133,8 @@
    {
       tmpA = *pA++;
       tmpB = *pB++;
-      accumDiff += (_Float16)fabsf(tmpA - tmpB);
-      accumSum += (_Float16)fabsf(tmpA + tmpB);
+      accumDiff += (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      accumSum += (_Float16)fabsf((float32_t)((_Float16)tmpA + (_Float16)tmpB));
       blockSize --;
    }
    /*
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
index cbb7e60..bcefc91 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
@@ -150,11 +150,11 @@
       tmpA = *pA++;
       tmpB = *pB++;
 
-      diff = fabsf(tmpA - tmpB);
-      sum = fabsf(tmpA) + fabsf(tmpB);
-      if ((tmpA != 0.0f16) || (tmpB != 0.0f16))
+      diff = fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      sum = (_Float16)fabsf((float32_t)tmpA) + (_Float16)fabsf((float32_t)tmpB);
+      if (((_Float16)tmpA != 0.0f16) || ((_Float16)tmpB != 0.0f16))
       {
-         accum += (diff / sum);
+         accum += ((_Float16)diff / (_Float16)sum);
       }
       blockSize --;
    }
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
index 63ff332..8dec141 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
@@ -117,7 +117,7 @@
 
    tmpA = *pA++;
    tmpB = *pB++;
-   diff = fabsf(tmpA - tmpB);
+   diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
    maxVal = diff;
    blockSize--;
 
@@ -125,8 +125,8 @@
    {
       tmpA = *pA++;
       tmpB = *pB++;
-      diff = fabsf(tmpA - tmpB);
-      if (diff > maxVal)
+      diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      if ((_Float16)diff > (_Float16)maxVal)
       {
         maxVal = diff;
       }
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
index a9513ce..44b3bdf 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
@@ -111,7 +111,7 @@
    {
       tmpA = *pA++;
       tmpB = *pB++;
-      accum  += (_Float16)fabsf(tmpA - tmpB);
+      accum  += (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
       
       blockSize --;
    }
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
index 70abbde..7517cec 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
@@ -70,21 +70,21 @@
     arm_mean_f16(pA, blockSize, &ma);
     arm_mean_f16(pB, blockSize, &mb);
 
-    arm_offset_f16(pA, -ma, pA, blockSize);
-    arm_offset_f16(pB, -mb, pB, blockSize);
+    arm_offset_f16(pA, -(_Float16)ma, pA, blockSize);
+    arm_offset_f16(pB, -(_Float16)mb, pB, blockSize);
 
     arm_power_f16(pA, blockSize, &pwra);
     arm_power_f16(pB, blockSize, &pwrb);
 
     arm_dot_prod_f16(pA,pB,blockSize,&dot);
 
-    dot = dot / blockSize;
-    pwra = pwra / blockSize;
-    pwrb = pwrb / blockSize;
+    dot = (_Float16)dot / (_Float16)blockSize;
+    pwra = (_Float16)pwra / (_Float16)blockSize;
+    pwrb = (_Float16)pwrb / (_Float16)blockSize;
 
-    arm_sqrt_f16(pwra * pwrb,&tmp);
+    arm_sqrt_f16((_Float16)pwra * (_Float16)pwrb,&tmp);
  
-    return(1.0f - dot / tmp);
+    return(1.0f16 - (_Float16)dot / (_Float16)tmp);
 
    
 }
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
index c85fe91..d93e35f 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
@@ -71,8 +71,8 @@
 
     arm_dot_prod_f16(pA,pB,blockSize,&dot);
 
-    arm_sqrt_f16(pwra * pwrb, &tmp);
-    return(1.0f - dot / tmp);
+    arm_sqrt_f16((_Float16)pwra * (_Float16)pwrb, &tmp);
+    return(1.0f16 - (_Float16)dot / (_Float16)tmp);
 
 }
 
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
index b89e7bd..b31ef34 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
@@ -54,7 +54,7 @@
 /// @private
 __STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
 {
-    return (x * logf(x / y));
+    return ((_Float16)x * (_Float16)logf((float32_t)((_Float16)x / (_Float16)y)));
 }
 #endif
 
@@ -117,7 +117,7 @@
 
     }
 
-    arm_sqrt_f16(vecAddAcrossF16Mve(accumV) / 2.0f, &tmp);
+    arm_sqrt_f16((_Float16)vecAddAcrossF16Mve(accumV) / 2.0f16, &tmp);
     return (tmp);
 }
 
@@ -162,7 +162,7 @@
 
 
     sum = left + right;
-    arm_sqrt_f16(sum/2.0f, &result);
+    arm_sqrt_f16((_Float16)sum/2.0f16, &result);
     return(result);
 
 }
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c b/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
index 9fa1390..c5300ab 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
@@ -103,7 +103,7 @@
         sumV = vaddq_m(sumV, sumV, tmpV, p0);
     }
 
-    return (powf(vecAddAcrossF16Mve(sumV), (1.0f / (float16_t) order)));
+    return (powf((float32_t)vecAddAcrossF16Mve(sumV), (1.0f / (float32_t) order)));
 }
 
 
@@ -115,14 +115,14 @@
     _Float16 sum;
     uint32_t i;
 
-    sum = 0.0f; 
+    sum = 0.0f16; 
     for(i=0; i < blockSize; i++)
     {
-       sum += (_Float16)powf(fabsf(pA[i] - pB[i]),order);
+       sum += (_Float16)powf(fabsf((float32_t)((_Float16)pA[i] - (_Float16)pB[i])),order);
     }
 
 
-    return(powf(sum,(1.0f/order)));
+    return(_Float16)(powf((float32_t)sum,(1.0f/(float32_t)order)));
 
 }
 
diff --git a/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c b/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
index 6587412..0698a64 100755
--- a/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
+++ b/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
@@ -63,7 +63,7 @@
     arm_boolean_distance_TT(pA, pB, numberOfBools, &ctt);
 
 
-    return(1.0*(numberOfBools - ctt) / ((float32_t)numberOfBools));
+    return(1.0f*(numberOfBools - ctt) / ((float32_t)numberOfBools));
 }
 
 
diff --git a/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c b/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c
index 82a6357..dd7c06e 100755
--- a/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c
+++ b/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c
@@ -71,7 +71,7 @@
       /* C = log(A) */
   
       /* Calculate log and store result in destination buffer. */
-      *pDst++ = expf(*pSrc++);
+      *pDst++ = (_Float16)expf((float32_t)*pSrc++);
   
       /* Decrement loop counter */
       blkCnt--;
diff --git a/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c b/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c
index bf78400..1067d02 100755
--- a/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c
+++ b/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c
@@ -68,7 +68,7 @@
    while (blkCnt > 0U)
    {
       
-      *pDst++ = 1.0 / *pSrc++;
+      *pDst++ = 1.0f16 / (_Float16)*pSrc++;
   
       /* Decrement loop counter */
       blkCnt--;
diff --git a/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c b/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c
index 1880072..7a98b08 100755
--- a/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c
+++ b/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c
@@ -69,7 +69,7 @@
       /* C = log(A) */
   
       /* Calculate log and store result in destination buffer. */
-      *pDst++ = logf(*pSrc++);
+      *pDst++ = (_Float16)logf((float32_t)*pSrc++);
   
       /* Decrement loop counter */
       blkCnt--;
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
index 72db81c..c1ac140 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
@@ -488,4 +488,4 @@
  */
 #endif /* #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
-#endif /*#if defined(ARM_FLOAT16_SUPPORTED)*/
\ No newline at end of file
+#endif /*#if defined(ARM_FLOAT16_SUPPORTED)*/
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c
index 3773031..d18dd3e 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c
@@ -96,29 +96,35 @@
 
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 
+/*
+
+The computation of the coefficients is done in float32 otherwise the
+resulting filter is too different from the expected one.
+
+*/
 static void generateCoefsFastBiquadF16(float16_t b0, float16_t b1, float16_t b2, float16_t a1, float16_t a2,
                                 arm_biquad_mod_coef_f16 * newCoef)
 {
     float32_t coeffs[8][12] = {
-        {0, 0, 0, 0, 0, 0, 0, b0, b1, b2, a1, a2},
-        {0, 0, 0, 0, 0, 0, b0, b1, b2, 0, a2, 0},
-        {0, 0, 0, 0, 0, b0, b1, b2, 0, 0, 0, 0},
-        {0, 0, 0, 0, b0, b1, b2, 0, 0, 0, 0, 0},
-        {0, 0, 0, b0, b1, b2, 0, 0, 0, 0, 0, 0},
-        {0, 0, b0, b1, b2, 0, 0, 0, 0, 0, 0, 0},
-        {0, b0, b1, b2, 0, 0, 0, 0, 0, 0, 0, 0},
-        {b0, b1, b2, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+        {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, (float32_t)a1, (float32_t)a2},
+        {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, (float32_t)a2, 0.0f},
+        {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {(float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}
     };
 
     for (int i = 0; i < 12; i++)
     {
-        coeffs[1][i] += (a1 * coeffs[0][i]);
-        coeffs[2][i] += (a1 * coeffs[1][i]) + (a2 * coeffs[0][i]);
-        coeffs[3][i] += (a1 * coeffs[2][i]) + (a2 * coeffs[1][i]);
-        coeffs[4][i] += (a1 * coeffs[3][i]) + (a2 * coeffs[2][i]);
-        coeffs[5][i] += (a1 * coeffs[4][i]) + (a2 * coeffs[3][i]);
-        coeffs[6][i] += (a1 * coeffs[5][i]) + (a2 * coeffs[4][i]);
-        coeffs[7][i] += (a1 * coeffs[6][i]) + (a2 * coeffs[5][i]);
+        coeffs[1][i] += ((float32_t)a1 * coeffs[0][i]);
+        coeffs[2][i] += ((float32_t)a1 * coeffs[1][i]) + ((float32_t)a2 * coeffs[0][i]);
+        coeffs[3][i] += ((float32_t)a1 * coeffs[2][i]) + ((float32_t)a2 * coeffs[1][i]);
+        coeffs[4][i] += ((float32_t)a1 * coeffs[3][i]) + ((float32_t)a2 * coeffs[2][i]);
+        coeffs[5][i] += ((float32_t)a1 * coeffs[4][i]) + ((float32_t)a2 * coeffs[3][i]);
+        coeffs[6][i] += ((float32_t)a1 * coeffs[5][i]) + ((float32_t)a2 * coeffs[4][i]);
+        coeffs[7][i] += ((float32_t)a1 * coeffs[6][i]) + ((float32_t)a2 * coeffs[5][i]);
 
         /*
          * transpose
@@ -157,4 +163,4 @@
 /**
   @} end of BiquadCascadeDF1 group
  */
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARMfloat16_t_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c
index 3661d71..3f619e5 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c
@@ -111,4 +111,4 @@
   @} end of BiquadCascadeDF2T group
  */
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
index 53b6807..27d1eef 100644
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
@@ -101,7 +101,7 @@
 void arm_biquad_cascade_df2T_compute_coefs_f32(
   arm_biquad_cascade_df2T_instance_f32 * S,
   uint8_t numStages,
-  float32_t * pCoeffs)
+  const float32_t * pCoeffs)
 {
    uint8_t cnt;
    float32_t *pDstCoeffs;
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
index 6cd934d..df1e917 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
@@ -431,4 +431,4 @@
   @} end of BiquadCascadeDF2T group
  */
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c b/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c
index f52439b..5924b7a 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c
@@ -638,16 +638,16 @@
     while (k > 0U)
     {
       /* x[0] * y[srcBLen - 4] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* x[1] * y[srcBLen - 3] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* x[2] * y[srcBLen - 2] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* x[3] * y[srcBLen - 1] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -667,7 +667,7 @@
     {
       /* Perform the multiply-accumulate */
       /* x[0] * y[srcBLen - 1] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -750,13 +750,13 @@
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[0] * y[0] */
-        acc0 += x0 * c0;
+        acc0 += (_Float16)x0 * (_Float16)c0;
         /* acc1 +=  x[1] * y[0] */
-        acc1 += x1 * c0;
+        acc1 += (_Float16)x1 * (_Float16)c0;
         /* acc2 +=  x[2] * y[0] */
-        acc2 += x2 * c0;
+        acc2 += (_Float16)x2 * (_Float16)c0;
         /* acc3 +=  x[3] * y[0] */
-        acc3 += x3 * c0;
+        acc3 += (_Float16)x3 * (_Float16)c0;
 
         /* Read y[1] sample */
         c0 = *(py++);
@@ -765,13 +765,13 @@
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[1] * y[1] */
-        acc0 += x1 * c0;
+        acc0 += (_Float16)x1 * (_Float16)c0;
         /* acc1 +=  x[2] * y[1] */
-        acc1 += x2 * c0;
+        acc1 += (_Float16)x2 * (_Float16)c0;
         /* acc2 +=  x[3] * y[1] */
-        acc2 += x3 * c0;
+        acc2 += (_Float16)x3 * (_Float16)c0;
         /* acc3 +=  x[4] * y[1] */
-        acc3 += x0 * c0;
+        acc3 += (_Float16)x0 * (_Float16)c0;
 
         /* Read y[2] sample */
         c0 = *(py++);
@@ -780,13 +780,13 @@
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[2] * y[2] */
-        acc0 += x2 * c0;
+        acc0 += (_Float16)x2 * (_Float16)c0;
         /* acc1 +=  x[3] * y[2] */
-        acc1 += x3 * c0;
+        acc1 += (_Float16)x3 * (_Float16)c0;
         /* acc2 +=  x[4] * y[2] */
-        acc2 += x0 * c0;
+        acc2 += (_Float16)x0 * (_Float16)c0;
         /* acc3 +=  x[5] * y[2] */
-        acc3 += x1 * c0;
+        acc3 += (_Float16)x1 * (_Float16)c0;
 
         /* Read y[3] sample */
         c0 = *(py++);
@@ -795,13 +795,13 @@
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[3] * y[3] */
-        acc0 += x3 * c0;
+        acc0 += (_Float16)x3 * (_Float16)c0;
         /* acc1 +=  x[4] * y[3] */
-        acc1 += x0 * c0;
+        acc1 += (_Float16)x0 * (_Float16)c0;
         /* acc2 +=  x[5] * y[3] */
-        acc2 += x1 * c0;
+        acc2 += (_Float16)x1 * (_Float16)c0;
         /* acc3 +=  x[6] * y[3] */
-        acc3 += x2 * c0;
+        acc3 += (_Float16)x2 * (_Float16)c0;
 
       } while (--k);
 
@@ -818,13 +818,13 @@
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[4] * y[4] */
-        acc0 += x0 * c0;
+        acc0 += (_Float16)x0 * (_Float16)c0;
         /* acc1 +=  x[5] * y[4] */
-        acc1 += x1 * c0;
+        acc1 += (_Float16)x1 * (_Float16)c0;
         /* acc2 +=  x[6] * y[4] */
-        acc2 += x2 * c0;
+        acc2 += (_Float16)x2 * (_Float16)c0;
         /* acc3 +=  x[7] * y[4] */
-        acc3 += x3 * c0;
+        acc3 += (_Float16)x3 * (_Float16)c0;
 
         /* Reuse the present samples for the next MAC */
         x0 = x1;
@@ -886,10 +886,10 @@
       while (k > 0U)
       {
         /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
-        sum += *px++ * *py++;
-        sum += *px++ * *py++;
-        sum += *px++ * *py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
 
         /* Decrement loop counter */
         k--;
@@ -907,7 +907,7 @@
       while (k > 0U)
       {
         /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
 
         /* Decrement the loop counter */
         k--;
@@ -947,7 +947,7 @@
       while (k > 0U)
       {
         /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
 
         /* Decrement the loop counter */
         k--;
@@ -1014,16 +1014,16 @@
     {
       /* Perform the multiply-accumulate */
       /* sum += x[srcALen - srcBLen + 4] * y[3] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* sum += x[srcALen - srcBLen + 3] * y[2] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* sum += x[srcALen - srcBLen + 2] * y[1] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* sum += x[srcALen - srcBLen + 1] * y[0] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -1042,7 +1042,7 @@
     while (k > 0U)
     {
       /* Perform the multiply-accumulate */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -1136,7 +1136,7 @@
       if ((((i - j) < srcBLen) && (j < srcALen)))
       {
         /* z[i] += x[i-j] * y[j] */
-        sum += pIn1[j] * pIn2[-((int32_t) i - (int32_t) j)];
+        sum += (_Float16)pIn1[j] * (_Float16)pIn2[-((int32_t) i - (int32_t) j)];
       }
     }
 
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c b/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c
index ad198dc..f89c2d3 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c
@@ -872,7 +872,7 @@
     while (i > 0U)
     {
       /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
-      acc0 += *px++ * *pb++;
+      acc0 += (_Float16)*px++ * (_Float16)*pb++;
 
       i--;
     }
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f32.c b/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f32.c
index 0a29e03..e53b456 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f32.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f32.c
@@ -223,18 +223,19 @@
   int nbCoefs)
 {
    float32_t e;
+   int p;
 
    a[0] = phi[1] / phi[0];
 
    e = phi[0] - phi[1] * a[0];
-   for(int p=1; p < nbCoefs; p++)
+   for(p=1; p < nbCoefs; p++)
    {
       float32_t suma=0.0f;
       float32_t sumb=0.0f;
       float32_t k;
-      int nb,j;
+      int nb,j,i;
 
-      for(int i=0; i < p; i++)
+      for(i=0; i < p; i++)
       {
          suma += a[i] * phi[p - i];
          sumb += a[i] * phi[i + 1];
@@ -245,7 +246,7 @@
 
       nb = p >> 1;
       j=0;
-      for(int i =0; i < nb ; i++)
+      for(i =0; i < nb ; i++)
       {
           float32_t x,y;
 
diff --git a/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_q31.c b/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_q31.c
index a226f02..aa1b8e4 100755
--- a/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_q31.c
+++ b/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_q31.c
@@ -307,6 +307,7 @@
   int nbCoefs)
 {
    q31_t e;
+   int p;
 
    //a[0] = phi[1] / phi[0];
    a[0] = divide(phi[1], phi[0]);
@@ -315,14 +316,14 @@
    //e = phi[0] - phi[1] * a[0];
    e = phi[0] - mul32x32(phi[1],a[0]);
 
-   for(int p=1; p < nbCoefs; p++)
+   for(p=1; p < nbCoefs; p++)
    {
       q63_t suma=0;
       q63_t sumb=0;
       q31_t k;
-      int nb,j;
+      int nb,j,i;
 
-      for(int i=0; i < p; i++)
+      for(i=0; i < p; i++)
       {
          suma += ((q63_t)a[i] * phi[p - i]);
          sumb += ((q63_t)a[i] * phi[i + 1]);
@@ -339,7 +340,7 @@
 
       nb = p >> 1;
       j=0;
-      for(int i =0;i < nb ; i++)
+      for(i =0;i < nb ; i++)
       {
           q31_t x,y;
 
diff --git a/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c b/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c
index c1946ac..799ed78 100755
--- a/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c
+++ b/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c
@@ -141,18 +141,19 @@
 
     /* Calculation of intermediate values */
     b1 = f00;
-    b2 = f01 - f00;
-    b3 = f10 - f00;
-    b4 = f00 - f01 - f10 + f11;
+    b2 = (_Float16)f01 - (_Float16)f00;
+    b3 = (_Float16)f10 - (_Float16)f00;
+    b4 = (_Float16)f00 - (_Float16)f01 - (_Float16)f10 + (_Float16)f11;
 
     /* Calculation of fractional part in X */
-    xdiff = X - xIndex;
+    xdiff = (_Float16)X - (_Float16)xIndex;
 
     /* Calculation of fractional part in Y */
-    ydiff = Y - yIndex;
+    ydiff = (_Float16)Y - (_Float16)yIndex;
 
     /* Calculation of bi-linear interpolated output */
-    out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
+    out = (_Float16)b1 + (_Float16)b2 * (_Float16)xdiff + 
+    (_Float16)b3 * (_Float16)ydiff + (_Float16)b4 * (_Float16)xdiff * (_Float16)ydiff;
 
     /* return to application */
     return (out);
diff --git a/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c b/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c
index f1c1bee..f4ae1e1 100755
--- a/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c
+++ b/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c
@@ -91,7 +91,7 @@
     float16_t *pYData = S->pYData;               /* pointer to output table */
 
     /* Calculation of index */
-    i = (int32_t) ((x - S->x1) / xSpacing);
+    i = (int32_t) (((_Float16)x - (_Float16)S->x1) / (_Float16)xSpacing);
 
     if (i < 0)
     {
@@ -106,15 +106,16 @@
     else
     {
       /* Calculation of nearest input values */
-      x0 = S->x1 +  i      * xSpacing;
-      x1 = S->x1 + (i + 1) * xSpacing;
+      x0 = (_Float16)S->x1 +  (_Float16)i      * (_Float16)xSpacing;
+      x1 = (_Float16)S->x1 + (_Float16)(i + 1) * (_Float16)xSpacing;
 
       /* Read of nearest output values */
       y0 = pYData[i];
       y1 = pYData[i + 1];
 
       /* Calculation of output */
-      y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
+      y = (_Float16)y0 + ((_Float16)x - (_Float16)x0) * 
+      (((_Float16)y1 - (_Float16)y0) / ((_Float16)x1 - (_Float16)x0));
 
     }
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c
index c666179..2db2c2a 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c
@@ -167,13 +167,13 @@
       /* C(m,n) = A(m,n) + B(m,n) */
 
       /* Add and store result in destination buffer. */
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
       /* Decrement loop counter */
       blkCnt--;
@@ -194,7 +194,7 @@
       /* C(m,n) = A(m,n) + B(m,n) */
 
       /* Add and store result in destination buffer. */
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
       /* Decrement loop counter */
       blkCnt--;
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c
index 3e9062c..9c8acef 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c
@@ -158,15 +158,15 @@
           pG[j * n + i] = vecAddAcrossF16Mve(acc);
        }
 
-       if (pG[i * n + i] <= 0.0f16)
+       if ((_Float16)pG[i * n + i] <= 0.0f16)
        {
          return(ARM_MATH_DECOMPOSITION_FAILURE);
        }
 
-       invSqrtVj = (_Float16)1.0f/sqrtf(pG[i * n + i]);
+       invSqrtVj = 1.0f16/(_Float16)sqrtf((float32_t)pG[i * n + i]);
        for(j=i; j < n ; j++)
        {
-         pG[j * n + i] = (_Float16)pG[j * n + i] * invSqrtVj ;
+         pG[j * n + i] = (_Float16)pG[j * n + i] * (_Float16)invSqrtVj ;
        }
     }
 
@@ -220,19 +220,22 @@
 
           for(k=0; k < i ; k++)
           {
-             pG[j * n + i] = pG[j * n + i] - pG[i * n + k] * pG[j * n + k];
+             pG[j * n + i] = (_Float16)pG[j * n + i] - (_Float16)pG[i * n + k] * (_Float16)pG[j * n + k];
           }
        }
 
-       if (pG[i * n + i] <= 0.0f)
+       if ((_Float16)pG[i * n + i] <= 0.0f16)
        {
          return(ARM_MATH_DECOMPOSITION_FAILURE);
        }
 
-       invSqrtVj = 1.0f/sqrtf(pG[i * n + i]);
+       /* The division is done in float32 for accuracy reason and
+       because doing it in f16 would not have any impact on the performances.
+       */
+       invSqrtVj = 1.0f/sqrtf((float32_t)pG[i * n + i]);
        for(j=i ; j < n ; j++)
        {
-         pG[j * n + i] = pG[j * n + i] * invSqrtVj ;
+         pG[j * n + i] = (_Float16)pG[j * n + i] * (_Float16)invSqrtVj ;
        }
     }
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c
index 1cd1bf7..4e095d0 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c
@@ -96,7 +96,7 @@
           }
        }
 
-       if (pG[i * n + i] <= 0.0f)
+       if (pG[i * n + i] <= 0.0)
        {
          return(ARM_MATH_DECOMPOSITION_FAILURE);
        }
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
index 977f381..7b6b488 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
@@ -415,8 +415,8 @@
     {
         if (numRowsA == 1)
         {
-            pOut[0] = pInA[0] * pInB[0] - pInA[1] * pInB[1];
-            pOut[1] = pInA[0] * pInB[1] + pInA[1] * pInB[0];
+            pOut[0] = (_Float16)pInA[0] * (_Float16)pInB[0] - (_Float16)pInA[1] * (_Float16)pInB[1];
+            pOut[1] = (_Float16)pInA[0] * (_Float16)pInB[1] + (_Float16)pInA[1] * (_Float16)pInB[0];
             return (ARM_MATH_SUCCESS);
         }
         else if  (numRowsA == 2)
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c
index f40dcb3..90bc06d 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c
@@ -196,7 +196,7 @@
             /*
              * Check if the pivot element is zero
              */
-            if (*pInT1 == 0.0f16)
+            if ((_Float16)*pInT1 == 0.0f16)
             {
                 /*
                  * Loop over the number rows present below
@@ -212,7 +212,7 @@
                      * Check if there is a non zero pivot element to
                      * * replace in the rows below
                      */
-                    if (*pInT2 != 0.0f16)
+                    if ((_Float16)*pInT2 != 0.0f16)
                     {
                         f16x8_t vecA, vecB;
                         /*
@@ -536,7 +536,7 @@
             pIn = pSrc->pData;
             for (i = 0; i < numRows * numCols; i++)
             {
-                if (pIn[i] != 0.0f16)
+                if ((_Float16)pIn[i] != 0.0f16)
                     break;
             }
 
@@ -676,7 +676,7 @@
 
 
       /* Check if the pivot element is zero */
-      if (*pInT1 == 0.0f16)
+      if ((_Float16)*pInT1 == 0.0f16)
       {
         /* Loop over the number rows present below */
 
@@ -688,7 +688,7 @@
 
           /* Check if there is a non zero pivot element to
            * replace in the rows below */
-          if (*pInT2 != 0.0f16)
+          if ((_Float16)*pInT2 != 0.0f16)
           {
             /* Loop over number of columns
              * to the right of the pilot element */
@@ -818,7 +818,7 @@
             /* Replace the element by the sum of that row
                and a multiple of the reference row  */
             in1 = *pInT1;
-            *pInT1++ = in1 - (in * *pPRT_in++);
+            *pInT1++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_in++);
 
             /* Decrement the loop counter */
             j--;
@@ -833,7 +833,7 @@
             /* Replace the element by the sum of that row
                and a multiple of the reference row  */
             in1 = *pInT2;
-            *pInT2++ = in1 - (in * *pPRT_pDst++);
+            *pInT2++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_pDst++);
 
             /* Decrement loop counter */
             j--;
@@ -864,12 +864,12 @@
     /* Set status as ARM_MATH_SUCCESS */
     status = ARM_MATH_SUCCESS;
 
-    if ((flag != 1U) && (in == 0.0f16))
+    if ((flag != 1U) && ((_Float16)in == 0.0f16))
     {
       pIn = pSrc->pData;
       for (i = 0; i < numRows * numCols; i++)
       {
-        if (pIn[i] != 0.0f16)
+        if ((_Float16)pIn[i] != 0.0f16)
             break;
       }
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
index bcca830..f8d10cb 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
@@ -165,7 +165,7 @@
 
         a = pA[k*n+k];
 
-        if (fabs(a) < 1.0e-8)
+        if (fabsf(a) < 1.0e-8f)
         {
 
             fullRank = 0;
@@ -324,7 +324,7 @@
 
 /// @private
 #define SWAP_ROWS_F32(A,i,j)     \
-  for(int w=0;w < n; w++)    \
+  for(w=0;w < n; w++)    \
   {                          \
      float32_t tmp;          \
      tmp = A[i*n + w];       \
@@ -334,7 +334,7 @@
 
 /// @private
 #define SWAP_COLS_F32(A,i,j)     \
-  for(int w=0;w < n; w++)    \
+  for(w=0;w < n; w++)    \
   {                          \
      float32_t tmp;          \
      tmp = A[w*n + i];       \
@@ -395,11 +395,12 @@
     const int n=pSrc->numRows;
     int fullRank = 1, diag,k;
     float32_t *pA;
+    int row,d;
 
     memcpy(pl->pData,pSrc->pData,n*n*sizeof(float32_t));
     pA = pl->pData;
 
-    for(int k=0;k < n; k++)
+    for(k=0;k < n; k++)
     {
       pp[k] = k;
     }
@@ -412,7 +413,10 @@
         int j=k; 
 
 
-        for(int r=k;r<n;r++)
+        int r;
+        int w;
+
+        for(r=k;r<n;r++)
         {
            if (pA[r*n+r] > m)
            {
@@ -432,22 +436,23 @@
 
         a = pA[k*n+k];
 
-        if (fabs(a) < 1.0e-8)
+        if (fabsf(a) < 1.0e-8f)
         {
 
             fullRank = 0;
             break;
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
-          for(int x=k+1;x<n;x++)
+          int x;
+          for(x=k+1;x<n;x++)
           {
              pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * pA[x*n+k] / a;
           }
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
                pA[w*n+k] = pA[w*n+k] / a;
         }
@@ -462,24 +467,26 @@
     if (!fullRank)
     {
       diag--;
-      for(int row=0; row < n;row++)
+      for(row=0; row < n;row++)
       {
-        for(int col=k; col < n;col++)
+        int col;
+        for(col=k; col < n;col++)
         {
            pl->pData[row*n+col]=0.0;
         }
       }
     }
 
-    for(int row=0; row < n;row++)
+    for(row=0; row < n;row++)
     {
-       for(int col=row+1; col < n;col++)
+       int col;
+       for(col=row+1; col < n;col++)
        {
          pl->pData[row*n+col] = 0.0;
        }
     }
 
-    for(int d=0; d < diag;d++)
+    for(d=0; d < diag;d++)
     {
       pd->pData[d*n+d] = pl->pData[d*n+d];
       pl->pData[d*n+d] = 1.0;
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c
index e1c3f8d..36deadc 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c
@@ -32,23 +32,30 @@
 
 
 /// @private
-#define SWAP_ROWS_F64(A,i,j)     \
-  for(int w=0;w < n; w++)    \
+#define SWAP_ROWS_F64(A,i,j) \
+{                            \
+  int w;                     \
+  for(w=0;w < n; w++)        \
   {                          \
      float64_t tmp;          \
      tmp = A[i*n + w];       \
      A[i*n + w] = A[j*n + w];\
      A[j*n + w] = tmp;       \
-  }
+  }                          \
+}
+
 /// @private
-#define SWAP_COLS_F64(A,i,j)     \
-  for(int w=0;w < n; w++)    \
+#define SWAP_COLS_F64(A,i,j) \
+{                            \
+  int w;                     \
+  for(w=0;w < n; w++)        \
   {                          \
      float64_t tmp;          \
      tmp = A[w*n + i];       \
      A[w*n + i] = A[w*n + j];\
      A[w*n + j] = tmp;       \
-  }
+  }                          \
+}
 
 /**
   @ingroup groupMatrix
@@ -108,7 +115,7 @@
     memcpy(pl->pData,pSrc->pData,n*n*sizeof(float64_t));
     pA = pl->pData;
 
-    for(int k=0;k < n; k++)
+    for(k=0;k < n; k++)
     {
       pp[k] = k;
     }
@@ -118,10 +125,10 @@
     {
         /* Find pivot */
         float64_t m=F64_MIN,a;
-        int j=k; 
+        int w,r,j=k; 
 
 
-        for(int r=k;r<n;r++)
+        for(r=k;r<n;r++)
         {
            if (pA[r*n+r] > m)
            {
@@ -148,15 +155,16 @@
             break;
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
-          for(int x=k+1;x<n;x++)
+          int x;
+          for(x=k+1;x<n;x++)
           {
              pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * pA[x*n+k] / a;
           }
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
                pA[w*n+k] = pA[w*n+k] / a;
         }
@@ -171,27 +179,38 @@
     if (!fullRank)
     {
       diag--;
-      for(int row=0; row < n;row++)
       {
-        for(int col=k; col < n;col++)
+        int row;
+        for(row=0; row < n;row++)
         {
-           pl->pData[row*n+col]=0.0;
+          int col;
+          for(col=k; col < n;col++)
+          {
+             pl->pData[row*n+col]=0.0;
+          }
         }
       }
     }
 
-    for(int row=0; row < n;row++)
     {
-       for(int col=row+1; col < n;col++)
-       {
-         pl->pData[row*n+col] = 0.0;
-       }
+      int row;
+      for(row=0; row < n;row++)
+      {
+         int col;
+         for(col=row+1; col < n;col++)
+         {
+           pl->pData[row*n+col] = 0.0;
+         }
+      }
     }
 
-    for(int d=0; d < diag;d++)
     {
-      pd->pData[d*n+d] = pl->pData[d*n+d];
-      pl->pData[d*n+d] = 1.0;
+      int d;
+      for(d=0; d < diag;d++)
+      {
+        pd->pData[d*n+d] = pl->pData[d*n+d];
+        pl->pData[d*n+d] = 1.0;
+      }
     }
   
     status = ARM_MATH_SUCCESS;
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c
index cba7c1d..3d3e820 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c
@@ -687,16 +687,16 @@
           /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
 
           /* Perform the multiply-accumulates */
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
           /* Decrement loop counter */
@@ -718,7 +718,7 @@
           /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
 
           /* Perform the multiply-accumulates */
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
           /* Decrement loop counter */
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c
index cf59ef4..a7bdf2d 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c
@@ -114,7 +114,7 @@
       do
       {
         /* Set the variable sum, that acts as accumulator, to zero */
-        sum = 0.0f;
+        sum = 0.0;
 
         /* Initialize pointer pIn1 to point to starting address of column being processed */
         pIn1 = pInA;
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c
index 5f09105..fc2193d 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c
@@ -161,10 +161,10 @@
       /* C(m,n) = A(m,n) * scale */
 
       /* Scale and store result in destination buffer. */
-      *pOut++ = (*pIn++) * scale;
-      *pOut++ = (*pIn++) * scale;
-      *pOut++ = (*pIn++) * scale;
-      *pOut++ = (*pIn++) * scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
 
       /* Decrement loop counter */
       blkCnt--;
@@ -185,7 +185,7 @@
       /* C(m,n) = A(m,n) * scale */
 
       /* Scale and store result in destination buffer. */
-      *pOut++ = (*pIn++) * scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
 
       /* Decrement loop counter */
       blkCnt--;
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c
index 4d0517f..c629420 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c
@@ -112,7 +112,7 @@
                 vecA = vfmsq(vecA,vdupq_n_f16(pLT[n*i + k]),vecX);
             }
 
-            if (pLT[n*i + i]==0.0f16)
+            if ((_Float16)pLT[n*i + i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
@@ -135,7 +135,7 @@
                 tmp -= (_Float16)lt_row[k] * (_Float16)pX[n*k+j];
             }
 
-            if (lt_row[i]==0.0f16)
+            if ((_Float16)lt_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
@@ -207,14 +207,14 @@
             
             for(k=0; k < i; k++)
             {
-                tmp -= lt_row[k] * pX[n*k+j];
+                tmp -= (_Float16)lt_row[k] * (_Float16)pX[n*k+j];
             }
 
-            if (lt_row[i]==0.0f)
+            if ((_Float16)lt_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
-            tmp = tmp / lt_row[i];
+            tmp = (_Float16)tmp / (_Float16)lt_row[i];
             pX[i*n+j] = tmp;
        }
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c
index bee02bc..4a1d4ad 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c
@@ -284,8 +284,6 @@
     */
     int i,j,k,n;
 
-    n = dst->numRows;
-
     float32_t *pX = dst->pData;
     float32_t *pLT = lt->pData;
     float32_t *pA = a->pData;
@@ -293,15 +291,17 @@
     float32_t *lt_row;
     float32_t *a_col;
 
+    n = dst->numRows;
+
     for(j=0; j < n; j ++)
     {
        a_col = &pA[j];
 
        for(i=0; i < n ; i++)
        {
-            lt_row = &pLT[n*i];
-
             float32_t tmp=a_col[i * n];
+
+            lt_row = &pLT[n*i];
             
             for(k=0; k < i; k++)
             {
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
index e389357..78a25b3 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
@@ -79,8 +79,6 @@
     */
     int i,j,k,n;
 
-    n = dst->numRows;
-
     float64_t *pX = dst->pData;
     float64_t *pLT = lt->pData;
     float64_t *pA = a->pData;
@@ -88,22 +86,24 @@
     float64_t *lt_row;
     float64_t *a_col;
 
+    n = dst->numRows;
+
     for(j=0; j < n; j ++)
     {
        a_col = &pA[j];
 
        for(i=0; i < n ; i++)
        {
+            float64_t tmp=a_col[i * n];
+
             lt_row = &pLT[n*i];
 
-            float64_t tmp=a_col[i * n];
-            
             for(k=0; k < i; k++)
             {
                 tmp -= lt_row[k] * pX[n*k+j];
             }
 
-            if (lt_row[i]==0.0f)
+            if (lt_row[i]==0.0)
             {
               return(ARM_MATH_SINGULAR);
             }
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c
index e3ac425..c9f47dc 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c
@@ -106,7 +106,7 @@
                 vecA = vfmsq(vecA,vdupq_n_f16(pUT[n*i + k]),vecX);
             }
 
-            if (pUT[n*i + i]==0.0f16)
+            if ((_Float16)pUT[n*i + i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
@@ -131,7 +131,7 @@
                 tmp -= (_Float16)ut_row[k] * (_Float16)pX[n*k+j];
             }
 
-            if (ut_row[i]==0.0f16)
+            if ((_Float16)ut_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
@@ -197,14 +197,14 @@
             
             for(k=n-1; k > i; k--)
             {
-                tmp -= ut_row[k] * pX[n*k+j];
+                tmp -= (_Float16)ut_row[k] * (_Float16)pX[n*k+j];
             }
 
-            if (ut_row[i]==0.0f)
+            if ((_Float16)ut_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
-            tmp = tmp / ut_row[i];
+            tmp = (_Float16)tmp / (_Float16)ut_row[i];
             pX[i*n+j] = tmp;
        }
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c
index cdf17d2..7a2475d 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c
@@ -270,8 +270,6 @@
 
     int i,j,k,n;
 
-    n = dst->numRows;
-
     float32_t *pX = dst->pData;
     float32_t *pUT = ut->pData;
     float32_t *pA = a->pData;
@@ -279,16 +277,18 @@
     float32_t *ut_row;
     float32_t *a_col;
 
+    n = dst->numRows;
+
     for(j=0; j < n; j ++)
     {
        a_col = &pA[j];
 
        for(i=n-1; i >= 0 ; i--)
        {
+            float32_t tmp=a_col[i * n];
+
             ut_row = &pUT[n*i];
 
-            float32_t tmp=a_col[i * n];
-            
             for(k=n-1; k > i; k--)
             {
                 tmp -= ut_row[k] * pX[n*k+j];
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
index 7032094..ed274bc 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
@@ -72,8 +72,6 @@
 
     int i,j,k,n;
 
-    n = dst->numRows;
-
     float64_t *pX = dst->pData;
     float64_t *pUT = ut->pData;
     float64_t *pA = a->pData;
@@ -81,22 +79,24 @@
     float64_t *ut_row;
     float64_t *a_col;
 
+    n = dst->numRows;
+
     for(j=0; j < n; j ++)
     {
        a_col = &pA[j];
 
        for(i=n-1; i >= 0 ; i--)
        {
+            float64_t tmp=a_col[i * n];
+
             ut_row = &pUT[n*i];
 
-            float64_t tmp=a_col[i * n];
-            
             for(k=n-1; k > i; k--)
             {
                 tmp -= ut_row[k] * pX[n*k+j];
             }
 
-            if (ut_row[i]==0.0f)
+            if (ut_row[i]==0.0)
             {
               return(ARM_MATH_SINGULAR);
             }
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c
index 559f2fd..13291b8 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c
@@ -168,10 +168,10 @@
       /* C(m,n) = A(m,n) - B(m,n) */
 
       /* Subtract and store result in destination buffer. */
-      *pOut++ = (*pInA++) - (*pInB++);
-      *pOut++ = (*pInA++) - (*pInB++);
-      *pOut++ = (*pInA++) - (*pInB++);
-      *pOut++ = (*pInA++) - (*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
 
       /* Decrement loop counter */
       blkCnt--;
@@ -192,7 +192,7 @@
       /* C(m,n) = A(m,n) - B(m,n) */
 
       /* Subtract and store result in destination buffer. */
-      *pOut++ = (*pInA++) - (*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
 
       /* Decrement loop counter */
       blkCnt--;
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c
index badf530..f592f6f 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c
@@ -310,10 +310,10 @@
         pInVec = pVec;
 
         /* Initialize accumulators */
-        float16_t sum1 = 0.0f;
-        float16_t sum2 = 0.0f;
-        float16_t sum3 = 0.0f;
-        float16_t sum4 = 0.0f;
+        float16_t sum1 = 0.0f16;
+        float16_t sum2 = 0.0f16;
+        float16_t sum3 = 0.0f16;
+        float16_t sum4 = 0.0f16;
 
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols;
@@ -331,13 +331,13 @@
             vecData = *(pInVec)++;
             // Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate
             matData = *(pInA1)++;
-            sum1 += matData * vecData;
+            sum1 += (_Float16)matData * (_Float16)vecData;
             matData = *(pInA2)++;
-            sum2 += matData * vecData;
+            sum2 += (_Float16)matData * (_Float16)vecData;
             matData = *(pInA3)++;
-            sum3 += matData * vecData;
+            sum3 += (_Float16)matData * (_Float16)vecData;
             matData = *(pInA4)++;
-            sum4 += matData * vecData;
+            sum4 += (_Float16)matData * (_Float16)vecData;
 
             // Decrement the loop counter
             colCnt--;
@@ -359,7 +359,7 @@
     row = numRows & 3u;
     while (row > 0) {
 
-        float16_t sum = 0.0f;
+        float16_t sum = 0.0f16;
         pInVec = pVec;
         pInA1 = pSrcA + i;
 
@@ -370,14 +370,14 @@
             vecData2 = *(pInVec)++;
             matData = *(pInA1)++;
             matData2 = *(pInA1)++;
-            sum += matData * vecData;
-            sum += matData2 * vecData2;
+            sum += (_Float16)matData * (_Float16)vecData;
+            sum += (_Float16)matData2 * (_Float16)vecData2;
             colCnt--;
         }
         // process remainder of row
         colCnt = numCols & 1u;
         while (colCnt > 0) {
-            sum += *pInA1++ * *pInVec++;
+            sum += (_Float16)*pInA1++ * (_Float16)*pInVec++;
             colCnt--;
         }
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c
index 03a94f6..b112d34 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c
@@ -310,16 +310,16 @@
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     /* row loop */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         float32_t sum1 = 0.0f;
         float32_t sum2 = 0.0f;
         float32_t sum3 = 0.0f;
         float32_t sum4 = 0.0f;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols;
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c
index 92be9ae..a2acd25 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c
@@ -289,16 +289,16 @@
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     /* row loop */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         q63_t sum1 = 0;
         q63_t sum2 = 0;
         q63_t sum3 = 0;
         q63_t sum4 = 0;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols >> 1;
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c
index 6d86e6f..9f491ea 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c
@@ -286,16 +286,16 @@
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     /* row loop */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         q63_t sum1 = 0;
         q63_t sum2 = 0;
         q63_t sum3 = 0;
         q63_t sum4 = 0;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols;
 
diff --git a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c
index 79c41f7..4ff43bc 100755
--- a/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c
+++ b/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c
@@ -301,16 +301,16 @@
 
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         q31_t sum1 = 0;
         q31_t sum2 = 0;
         q31_t sum3 = 0;
         q31_t sum4 = 0;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 4 columns per iteration */
         colCnt = numCols >> 2;
 
diff --git a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c
index f70dc60..2105341 100755
--- a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c
+++ b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c
@@ -145,7 +145,8 @@
     float32_t *pOutputRotations, 
     uint32_t nbQuaternions)
 {
-   for(uint32_t nb=0; nb < nbQuaternions; nb++)
+   uint32_t nb;
+   for(nb=0; nb < nbQuaternions; nb++)
    {
         float32_t q00 = SQ(pInputQuaternions[0 + nb * 4]);
         float32_t q11 = SQ(pInputQuaternions[1 + nb * 4]);
diff --git a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c
index d70faf4..213a5bb 100755
--- a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c
+++ b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c
@@ -81,7 +81,8 @@
     float32_t *pConjugateQuaternions, 
     uint32_t nbQuaternions)
 {
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
 
       pConjugateQuaternions[4 * i + 0] = pInputQuaternions[4 * i + 0];
diff --git a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c
index 5362ff6..d82bbbf 100755
--- a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c
+++ b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c
@@ -92,7 +92,8 @@
 {
    float32_t temp;
 
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
 
       temp = SQ(pInputQuaternions[4 * i + 0]) +
diff --git a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c
index 8494736..af6653a 100755
--- a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c
+++ b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c
@@ -84,8 +84,9 @@
   uint32_t nbQuaternions)
 {
    float32_t temp;
+   uint32_t i;
 
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   for(i=0; i < nbQuaternions; i++)
    {
       temp = SQ(pInputQuaternions[4 * i + 0]) +
              SQ(pInputQuaternions[4 * i + 1]) +
diff --git a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c
index 6ae96d4..605e558 100755
--- a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c
+++ b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c
@@ -85,7 +85,8 @@
 {
    float32_t temp;
 
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
       temp = SQ(pInputQuaternions[4 * i + 0]) +
              SQ(pInputQuaternions[4 * i + 1]) +
diff --git a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c
index ec4cdf4..83ebf5e 100755
--- a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c
+++ b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c
@@ -132,7 +132,8 @@
     float32_t *qr,
     uint32_t nbQuaternions)
 {
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
      arm_quaternion_product_single_f32(qa, qb, qr);
 
diff --git a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c
index b293077..5d57492 100755
--- a/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c
+++ b/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c
@@ -100,51 +100,51 @@
 
       if (trace > 0)
       {
-        (void)arm_sqrt_f32(trace + 1.0, &doubler) ; // invs=4*qw
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(trace + 1.0f, &doubler) ; // invs=4*qw
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
 
-        q[0] = 0.25 * doubler;
+        q[0] = 0.25f * doubler;
         q[1] = R21 - R12;
         q[2] = R02 - R20;
         q[3] = R10 - R01;
       }
       else if ((R00 > R11) && (R00 > R22) )
       {
-        (void)arm_sqrt_f32(1.0 + R00 - R11 - R22,&doubler); // invs=4*qx
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(1.0f + R00 - R11 - R22,&doubler); // invs=4*qx
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
 
         q[0] = R21 - R12;
-        q[1] = 0.25 * doubler;
+        q[1] = 0.25f * doubler;
         q[2] = R01 + R10;
         q[3] = R02 + R20;
       }
       else if (R11 > R22)
       {
-        (void)arm_sqrt_f32(1.0 + R11 - R00 - R22,&doubler); // invs=4*qy
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(1.0f + R11 - R00 - R22,&doubler); // invs=4*qy
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
 
         q[0] = R02 - R20;
         q[1] = R01 + R10;
-        q[2] = 0.25 * doubler;
+        q[2] = 0.25f * doubler;
         q[3] = R12 + R21;
       }
       else
       {
-        (void)arm_sqrt_f32(1.0 + R22 - R00 - R11,&doubler); // invs=4*qz
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(1.0f + R22 - R00 - R11,&doubler); // invs=4*qz
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
@@ -152,7 +152,7 @@
         q[0] = R10 - R01;
         q[1] = R02 + R20;
         q[2] = R12 + R21;
-        q[3] = 0.25 * doubler;
+        q[3] = 0.25f * doubler;
       }
 
       vst1q(pOutputQuaternions, q);
@@ -166,7 +166,8 @@
     float32_t *pOutputQuaternions,  
     uint32_t nbQuaternions)
 {
-   for(uint32_t nb=0; nb < nbQuaternions; nb++)
+   uint32_t nb;
+   for(nb=0; nb < nbQuaternions; nb++)
    {
        const float32_t *r=&pInputRotations[nb*9];
        float32_t *q=&pOutputQuaternions[nb*4];
@@ -178,41 +179,41 @@
 
 
 
-      if (trace > 0)
+      if (trace > 0.0f)
       {
-        doubler = sqrtf(trace + 1.0) * 2; // invs=4*qw
-        s = 1.0 / doubler;
-        q[0] = 0.25 * doubler;
+        doubler = sqrtf(trace + 1.0f) * 2.0f; // invs=4*qw
+        s = 1.0f / doubler;
+        q[0] = 0.25f * doubler;
         q[1] = (RI(2,1) - RI(1,2)) * s;
         q[2] = (RI(0,2) - RI(2,0)) * s;
         q[3] = (RI(1,0) - RI(0,1)) * s;
       }
       else if ((RI(0,0) > RI(1,1)) && (RI(0,0) > RI(2,2)) )
       {
-        doubler = sqrtf(1.0 + RI(0,0) - RI(1,1) - RI(2,2)) * 2; // invs=4*qx
-        s = 1.0 / doubler;
+        doubler = sqrtf(1.0f + RI(0,0) - RI(1,1) - RI(2,2)) * 2.0f; // invs=4*qx
+        s = 1.0f / doubler;
         q[0] = (RI(2,1) - RI(1,2)) * s;
-        q[1] = 0.25 * doubler;
+        q[1] = 0.25f * doubler;
         q[2] = (RI(0,1) + RI(1,0)) * s;
         q[3] = (RI(0,2) + RI(2,0)) * s;
       }
       else if (RI(1,1) > RI(2,2))
       {
-        doubler = sqrtf(1.0 + RI(1,1) - RI(0,0) - RI(2,2)) * 2; // invs=4*qy
-        s = 1.0 / doubler;
+        doubler = sqrtf(1.0f + RI(1,1) - RI(0,0) - RI(2,2)) * 2.0f; // invs=4*qy
+        s = 1.0f / doubler;
         q[0] = (RI(0,2) - RI(2,0)) * s;
         q[1] = (RI(0,1) + RI(1,0)) * s;
-        q[2] = 0.25 * doubler;
+        q[2] = 0.25f * doubler;
         q[3] = (RI(1,2) + RI(2,1)) * s;
       }
       else
       {
-        doubler = sqrtf(1.0 + RI(2,2) - RI(0,0) - RI(1,1)) * 2; // invs=4*qz
-        s = 1.0 / doubler;
+        doubler = sqrtf(1.0f + RI(2,2) - RI(0,0) - RI(1,1)) * 2.0f; // invs=4*qz
+        s = 1.0f / doubler;
         q[0] = (RI(1,0) - RI(0,1)) * s;
         q[1] = (RI(0,2) + RI(2,0)) * s;
         q[2] = (RI(1,2) + RI(2,1)) * s;
-        q[3] = 0.25 * doubler;
+        q[3] = 0.25f * doubler;
       }
 
     }
diff --git a/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c b/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c
index 494ef9a..fe907e5 100755
--- a/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c
+++ b/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c
@@ -298,9 +298,9 @@
         dot=0;
         for(j=0; j < S->vectorDimension; j++)
         {
-            dot = dot + in[j]* *pSupport++;
+            dot = (_Float16)dot + (_Float16)in[j]* (_Float16)*pSupport++;
         }
-        sum += S->dualCoefficients[i] * dot;
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)dot;
     }
     *pResult=S->classes[STEP(sum)];
 }
diff --git a/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c b/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
index e3e2d6a..46bc689 100755
--- a/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
+++ b/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
@@ -34,6 +34,8 @@
 #include <math.h>
 
 
+
+
 /**
  * @addtogroup polysvm
  * @{
@@ -303,6 +305,27 @@
 }
 
 #else
+
+/*
+
+_Float16 is not supported in g++ so we avoid putting _Float16 definitions
+in the public headers.
+
+This function should at some point be moved in FastMath.
+
+*/
+__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
+{
+    float16_t r = x;
+    nb --;
+    while(nb > 0)
+    {
+        r = (_Float16)r * (_Float16)x;
+        nb--;
+    }
+    return(r);
+}
+
 void arm_svm_polynomial_predict_f16(
     const arm_svm_polynomial_instance_f16 *S,
     const float16_t * in,
@@ -318,9 +341,9 @@
         dot=0;
         for(j=0; j < S->vectorDimension; j++)
         {
-            dot = dot + (_Float16)in[j]* (_Float16)*pSupport++;
+            dot = (_Float16)dot + (_Float16)in[j]* (_Float16)*pSupport++;
         }
-        sum += S->dualCoefficients[i] * (_Float16)arm_exponent_f16(S->gamma * dot + S->coef0, S->degree);
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)arm_exponent_f16((_Float16)S->gamma * (_Float16)dot + (_Float16)S->coef0, S->degree);
     }
 
     *pResult=S->classes[STEP(sum)];
diff --git a/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c b/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
index 7724fda..91afcc1 100755
--- a/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
+++ b/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
@@ -70,7 +70,7 @@
     uint32_t         blkCnt;     /* loop counters */
     const float16_t *pDualCoef = S->dualCoefficients;
     _Float16       sum = S->intercept;
-    f16x8_t         vSum = vdupq_n_f16(0);
+    f16x8_t         vSum = vdupq_n_f16(0.0f16);
 
     row = numRows;
 
@@ -97,10 +97,10 @@
         /*
          * reset accumulators
          */
-        acc0 = vdupq_n_f16(0.0f);
-        acc1 = vdupq_n_f16(0.0f);
-        acc2 = vdupq_n_f16(0.0f);
-        acc3 = vdupq_n_f16(0.0f);
+        acc0 = vdupq_n_f16(0.0f16);
+        acc1 = vdupq_n_f16(0.0f16);
+        acc2 = vdupq_n_f16(0.0f16);
+        acc3 = vdupq_n_f16(0.0f16);
 
         pSrcA0Vec = pInA0;
         pSrcA1Vec = pInA1;
@@ -170,7 +170,7 @@
 
         vSum =
             vfmaq_m_f16(vSum, vld1q(pDualCoef),
-                      vexpq_f16(vmulq_n_f16(vtmp, -S->gamma)),vctp16q(4));
+                      vexpq_f16(vmulq_n_f16(vtmp, -(_Float16)S->gamma)),vctp16q(4));
         pDualCoef += 4;
         pSrcA += numCols * 4;
         /*
@@ -199,8 +199,8 @@
         /*
          * reset accumulators
          */
-        acc0 = vdupq_n_f16(0.0f);
-        acc1 = vdupq_n_f16(0.0f);
+        acc0 = vdupq_n_f16(0.0f16);
+        acc1 = vdupq_n_f16(0.0f16);
         pSrcA0Vec = pInA0;
         pSrcA1Vec = pInA1;
 
@@ -248,7 +248,7 @@
 
         vSum =
             vfmaq_m_f16(vSum, vld1q(pDualCoef),
-                        vexpq_f16(vmulq_n_f16(vtmp, -S->gamma)), vctp16q(2));
+                        vexpq_f16(vmulq_n_f16(vtmp, -(_Float16)S->gamma)), vctp16q(2));
         pDualCoef += 2;
 
         pSrcA += numCols * 2;
@@ -309,12 +309,12 @@
 
         vSum =
             vfmaq_m_f16(vSum, vld1q(pDualCoef),
-                        vexpq_f16(vmulq_n_f16(vtmp, -S->gamma)), vctp16q(1));
+                        vexpq_f16(vmulq_n_f16(vtmp, -(_Float16)S->gamma)), vctp16q(1));
 
     }
 
 
-    sum += vecAddAcrossF16Mve(vSum);
+    sum += (_Float16)vecAddAcrossF16Mve(vSum);
     *pResult = S->classes[STEP(sum)];
 }
 
@@ -337,7 +337,7 @@
             dot = dot + SQ((_Float16)in[j] - (_Float16) *pSupport);
             pSupport++;
         }
-        sum += (_Float16)S->dualCoefficients[i] * (_Float16)expf(-(_Float16)S->gamma * dot);
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)expf((float32_t)(-(_Float16)S->gamma * (_Float16)dot));
     }
     *pResult=S->classes[STEP(sum)];
 }
diff --git a/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c b/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
index 670806b..e2d541f 100755
--- a/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
+++ b/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
@@ -295,7 +295,7 @@
                         vtanhq_f16(vaddq_n_f16(vmulq_n_f16(vtmp, S->gamma), S->coef0)),
                         vctp16q(1));
     }
-    sum += vecAddAcrossF16Mve(vSum);
+    sum += (_Float16)vecAddAcrossF16Mve(vSum);
 
     *pResult = S->classes[STEP(sum)];
 }
@@ -316,9 +316,9 @@
         dot=0.0f16;
         for(j=0; j < S->vectorDimension; j++)
         {
-            dot = dot + (_Float16)in[j] * (_Float16)*pSupport++;
+            dot = (_Float16)dot + (_Float16)in[j] * (_Float16)*pSupport++;
         }
-        sum += (_Float16)S->dualCoefficients[i] * (_Float16)tanhf((_Float16)S->gamma * dot + (_Float16)S->coef0);
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)tanhf((float32_t)((_Float16)S->gamma * (_Float16)dot + (_Float16)S->coef0));
     }
     *pResult=S->classes[STEP(sum)];
 }
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c b/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c
index 74109ce..e6d0003 100644
--- a/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c
@@ -65,4 +65,4 @@
 #include "arm_absmin_f32.c"
 #include "arm_absmin_q15.c"
 #include "arm_absmin_q31.c"
-#include "arm_absmin_q7.c"
\ No newline at end of file
+#include "arm_absmin_q7.c"
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c b/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c
index f5fab8d..307fff8 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c
@@ -39,4 +39,4 @@
 #include "arm_logsumexp_f16.c"
 #include "arm_max_no_idx_f16.c"
 #include "arm_absmax_f16.c"
-#include "arm_absmin_f16.c"
\ No newline at end of file
+#include "arm_absmin_f16.c"
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c
index 473397f..4241eb5 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c
@@ -152,7 +152,7 @@
   outIndex = 0U;                                                                                            \
   /* Load first input value that act as reference value for comparision */                                  \
   out = *pSrc++;                                                                                            \
-  out = (out > 0.0f16) ? out : -out;                                                                             \
+  out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
   /* Initialize index of extrema value. */                                                                  \
   index = 0U;                                                                                               \
                                                                                                             \
@@ -163,9 +163,9 @@
   {                                                                                                         \
     /* Initialize cur_absmax to next consecutive values one by one */                                         \
     cur_absmax = *pSrc++;                                                                                     \
-    cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
     /* compare for the extrema value */                                                                     \
-    if (cur_absmax > out)                                                                         \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
     {                                                                                                       \
       /* Update the extrema value and it's index */                                                         \
       out = cur_absmax;                                                                                       \
@@ -173,24 +173,24 @@
     }                                                                                                       \
                                                                                                             \
     cur_absmax = *pSrc++;                                                                                     \
-    cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
-    if (cur_absmax > out)                                                                         \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
     {                                                                                                       \
       out = cur_absmax;                                                                                       \
       outIndex = index + 2U;                                                                                \
     }                                                                                                       \
                                                                                                             \
     cur_absmax = *pSrc++;                                                                                     \
-    cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
-    if (cur_absmax > out)                                                                          \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
     {                                                                                                       \
       out = cur_absmax;                                                                                       \
       outIndex = index + 3U;                                                                                \
     }                                                                                                       \
                                                                                                             \
     cur_absmax = *pSrc++;                                                                                     \
-    cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
-    if (cur_absmax > out)                                                                          \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
     {                                                                                                       \
       out = cur_absmax;                                                                                       \
       outIndex = index + 4U;                                                                                \
@@ -209,8 +209,8 @@
   while (blkCnt > 0U)                                                                                       \
   {                                                                                                         \
     cur_absmax = *pSrc++;                                                                                     \
-    cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
-    if (cur_absmax > out)                                                                         \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
     {                                                                                                       \
       out = cur_absmax;                                                                                       \
       outIndex = blockSize - blkCnt;                                                                        \
@@ -238,7 +238,7 @@
   outIndex = 0U;
 
   /* Load first input value that act as reference value for comparision */
-  out = fabsf(*pSrc++);
+  out = (_Float16)fabsf((float32_t)*pSrc++);
 
   /* Initialize blkCnt with number of samples */
   blkCnt = (blockSize - 1U);
@@ -246,10 +246,10 @@
   while (blkCnt > 0U)
   {
     /* Initialize maxVal to the next consecutive values one by one */
-    maxVal = fabsf(*pSrc++);
+    maxVal = (_Float16)fabsf((float32_t)*pSrc++);
 
     /* compare for the maximum value */
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       /* Update the maximum value and it's index */
       out = maxVal;
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c
index 45aec49..b450b55 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c
@@ -154,7 +154,7 @@
   outIndex = 0U;                                                                                            \
   /* Load first input value that act as reference value for comparision */                                  \
   out = *pSrc++;                                                                                            \
-  out = (out > 0.0f16) ? out : -out;                                                                             \
+  out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
   /* Initialize index of extrema value. */                                                                  \
   index = 0U;                                                                                               \
                                                                                                             \
@@ -165,9 +165,9 @@
   {                                                                                                         \
     /* Initialize cur_absmin to next consecutive values one by one */                                         \
     cur_absmin = *pSrc++;                                                                                     \
-    cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
     /* compare for the extrema value */                                                                     \
-    if (cur_absmin < out)                                                                         \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
     {                                                                                                       \
       /* Update the extrema value and it's index */                                                         \
       out = cur_absmin;                                                                                       \
@@ -175,24 +175,24 @@
     }                                                                                                       \
                                                                                                             \
     cur_absmin = *pSrc++;                                                                                     \
-    cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
-    if (cur_absmin < out)                                                                         \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
     {                                                                                                       \
       out = cur_absmin;                                                                                       \
       outIndex = index + 2U;                                                                                \
     }                                                                                                       \
                                                                                                             \
     cur_absmin = *pSrc++;                                                                                     \
-    cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
-    if (cur_absmin < out)                                                                          \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                          \
     {                                                                                                       \
       out = cur_absmin;                                                                                       \
       outIndex = index + 3U;                                                                                \
     }                                                                                                       \
                                                                                                             \
     cur_absmin = *pSrc++;                                                                                     \
-    cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
-    if (cur_absmin < out)                                                                          \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                          \
     {                                                                                                       \
       out = cur_absmin;                                                                                       \
       outIndex = index + 4U;                                                                                \
@@ -211,8 +211,8 @@
   while (blkCnt > 0U)                                                                                       \
   {                                                                                                         \
     cur_absmin = *pSrc++;                                                                                     \
-    cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin;                                                                 \
-    if (cur_absmin < out)                                                                         \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
     {                                                                                                       \
       out = cur_absmin;                                                                                       \
       outIndex = blockSize - blkCnt;                                                                        \
@@ -248,10 +248,10 @@
   while (blkCnt > 0U)
   {
     /* Initialize minVal to the next consecutive values one by one */
-    minVal = fabsf(*pSrc++);
+    minVal = (_Float16)fabsf((float32_t)*pSrc++);
 
     /* compare for the minimum value */
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       /* Update the minimum value and it's index */
       out = minVal;
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c
index ffe08f4..9611fd7 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c
@@ -97,7 +97,7 @@
     while(blkCnt > 0)
     {
        p = *pSrcA++;
-       accum += p * logf(p);
+       accum += p * (_Float16)logf((float32_t)p);
        
        blkCnt--;
     
@@ -122,7 +122,7 @@
     while(blkCnt > 0)
     {
        p = *pIn++;
-       accum += p * logf(p);
+       accum += p * (_Float16)logf((float32_t)p);
        
        blkCnt--;
     
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c
index c208ff4..d671791 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c
@@ -53,7 +53,7 @@
     pIn = pSrcA;
     blkCnt = blockSize;
 
-    accum = 0.0f;
+    accum = 0.0;
 
     while(blkCnt > 0)
     {
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
index a7da249..98ac097 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
@@ -81,7 +81,7 @@
 
     accum = 0.0f16;
 
-    f16x8_t         vSum = vdupq_n_f16(0.0f);
+    f16x8_t         vSum = vdupq_n_f16(0.0f16);
     blkCnt = blockSize >> 3;
     while(blkCnt > 0)
     {
@@ -108,7 +108,7 @@
     {
        pA = *pSrcA++;
        pB = *pSrcB++;
-       accum += pA * logf(pB / pA);
+       accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA);
        
        blkCnt--;
     
@@ -134,7 +134,7 @@
     {
        pA = *pInA++;
        pB = *pInB++;
-       accum += pA * logf(pB / pA);
+       accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA);
        
        blkCnt--;
     
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
index b22d047..b43b218 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
@@ -55,7 +55,7 @@
     pInB = pSrcB;
     blkCnt = blockSize;
 
-    accum = 0.0f;
+    accum = 0.0;
 
     while(blkCnt > 0)
     {
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c
index 81272d5..84d4843 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c
@@ -111,12 +111,12 @@
     while(blkCnt > 0)
     {
        tmp = *pIn++;
-       accum += expf(tmp - maxVal);
+       accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
        blkCnt--;
     
     }
 
-    accum = maxVal + logf(accum);
+    accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
 
     return (accum);
 }
@@ -154,11 +154,11 @@
     while(blkCnt > 0)
     {
        tmp = *pIn++;
-       accum += expf(tmp - maxVal);
+       accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
        blkCnt--;
     
     }
-    accum = maxVal + logf(accum);
+    accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
 
     return(accum);
 }
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c
index 25daaf0..72a5d3a 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c
@@ -113,7 +113,7 @@
     
     }
 
-    accum = maxVal + log(accum);
+    accum = maxVal + logf(accum);
 
     return (accum);
 }
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c
index b9b64f0..b5017f1 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c
@@ -120,7 +120,7 @@
       tmp = *pSrc++;
 
       /* compare for the maximum value */
-      if (maxValue < tmp)
+      if ((_Float16)maxValue < (_Float16)tmp)
       {
         /* Update the maximum value and it's index */
         maxValue = tmp;
@@ -171,7 +171,7 @@
     maxVal = *pSrc++;
 
     /* compare for the maximum value */
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       /* Update the maximum value and it's index */
       out = maxVal;
@@ -179,21 +179,21 @@
     }
 
     maxVal = *pSrc++;
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       out = maxVal;
       outIndex = index + 2U;
     }
 
     maxVal = *pSrc++;
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       out = maxVal;
       outIndex = index + 3U;
     }
 
     maxVal = *pSrc++;
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       out = maxVal;
       outIndex = index + 4U;
@@ -221,7 +221,7 @@
     maxVal = *pSrc++;
 
     /* compare for the maximum value */
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       /* Update the maximum value and it's index */
       out = maxVal;
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c
index 3a95b4b..f989f60 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c
@@ -95,7 +95,7 @@
         newVal = *pSrc++;
 
         /* compare for the maximum value */
-        if (maxValue < newVal)
+        if ((_Float16)maxValue < (_Float16)newVal)
         {
             /* Update the maximum value and it's index */
             maxValue = newVal;
@@ -122,7 +122,7 @@
        newVal = *pSrc++;
    
        /* compare for the maximum value */
-       if (maxValue < newVal)
+       if ((_Float16)maxValue < (_Float16)newVal)
        {
            /* Update the maximum value and it's index */
            maxValue = newVal;
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c
index 02f495d..d296482 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c
@@ -85,7 +85,7 @@
     }
     while (blkCnt > 0);
 
-    *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) blockSize;
+    *pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) blockSize;
 }
 
 
@@ -107,13 +107,13 @@
   while (blkCnt > 0U)
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
     /* Decrement the loop counter */
     blkCnt--;
@@ -132,7 +132,7 @@
   while (blkCnt > 0U)
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -140,7 +140,7 @@
 
   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
   /* Store result to destination */
-  *pResult = (sum / (float16_t)blockSize);
+  *pResult = ((_Float16)sum / (_Float16)blockSize);
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c
index 0d123f5..3ff9db4 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c
@@ -165,7 +165,7 @@
     minVal = *pSrc++;
 
     /* compare for the minimum value */
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       /* Update the minimum value and it's index */
       out = minVal;
@@ -173,21 +173,21 @@
     }
 
     minVal = *pSrc++;
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       out = minVal;
       outIndex = index + 2U;
     }
 
     minVal = *pSrc++;
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       out = minVal;
       outIndex = index + 3U;
     }
 
     minVal = *pSrc++;
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       out = minVal;
       outIndex = index + 4U;
@@ -215,7 +215,7 @@
     minVal = *pSrc++;
 
     /* compare for the minimum value */
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       /* Update the minimum value and it's index */
       out = minVal;
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c
index 1cb18c2..42a00ef 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c
@@ -73,7 +73,7 @@
     arm_power_f16(pSrc, blockSize, &pow);
 
     /* Compute Rms and store the result in the destination */
-    arm_sqrt_f16(pow / (float16_t) blockSize, pResult);
+    arm_sqrt_f16((_Float16)pow / (_Float16) blockSize, pResult);
 }
 #else
 
@@ -135,7 +135,7 @@
   }
 
   /* Compute Rms and store result in destination */
-  arm_sqrt_f16(sum / (float16_t) blockSize, pResult);
+  arm_sqrt_f16((_Float16)sum / (_Float16) blockSize, pResult);
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
diff --git a/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c b/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c
index ad151d2..0e3ade5 100755
--- a/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c
+++ b/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c
@@ -61,7 +61,7 @@
 {
     int32_t         blkCnt;     /* loop counters */
     f16x8_t         vecSrc;
-    f16x8_t         sumVec = vdupq_n_f16((float16_t) 0.0);
+    f16x8_t         sumVec = vdupq_n_f16(0.0f16);
     float16_t       fMean;
 
     if (blockSize <= 1U) {
@@ -72,15 +72,6 @@
 
     arm_mean_f16(pSrc, blockSize, &fMean);
 
-/* 6.14 bug */
-#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
-    __asm volatile(
-        "   vmov.i32                     %[acc], #0 \n"
-        : [acc] "+t"(sumVec)
-        : 
-        : );
-#endif
-
     blkCnt = blockSize;
     do {
         mve_pred16_t    p = vctp16q(blkCnt);
@@ -98,7 +89,7 @@
     while (blkCnt > 0);
     
     /* Variance */
-    *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) (blockSize - 1.0f);
+    *pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) (blockSize - 1.0f16);
 }
 #else
 
@@ -128,10 +119,10 @@
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
 
-    sum += *pInput++;
-    sum += *pInput++;
-    sum += *pInput++;
-    sum += *pInput++;
+    sum += (_Float16)*pInput++;
+    sum += (_Float16)*pInput++;
+    sum += (_Float16)*pInput++;
+    sum += (_Float16)*pInput++;
 
 
     /* Decrement loop counter */
@@ -152,14 +143,14 @@
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
 
-    sum += *pInput++;
+    sum += (_Float16)*pInput++;
 
     /* Decrement loop counter */
     blkCnt--;
   }
 
   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
-  fMean = sum / (float16_t) blockSize;
+  fMean = (_Float16)sum / (_Float16) blockSize;
 
   pInput = pSrc;
 
@@ -170,17 +161,17 @@
 
   while (blkCnt > 0U)
   {
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -198,15 +189,15 @@
 
   while (blkCnt > 0U)
   {
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
     /* Decrement loop counter */
     blkCnt--;
   }
 
   /* Variance */
-  *pResult = fSum / (float16_t)(blockSize - 1.0f);
+  *pResult = (_Float16)fSum / ((_Float16)blockSize - 1.0f16);
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c b/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
index 9dc8c18..6dfe55c 100755
--- a/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
@@ -105,7 +105,7 @@
         w2 = *pW++;
         w3 = *pW++;
         w4 = *pW++;
-        accum += w1 + w2 + w3 + w4;
+        accum += (_Float16)w1 + (_Float16)w2 + (_Float16)w3 + (_Float16)w4;
 
         blkCntSample = vecDim >> 3;
         while (blkCntSample > 0) {
@@ -131,10 +131,10 @@
 
         blkCntSample = vecDim & 7;
         while (blkCntSample > 0) {
-            *pOut = *pOut + *pIn1++ * w1;
-            *pOut = *pOut + *pIn2++ * w2;
-            *pOut = *pOut + *pIn3++ * w3;
-            *pOut = *pOut + *pIn4++ * w4;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn1++ * (_Float16)w1;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn2++ * (_Float16)w2;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn3++ * (_Float16)w3;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn4++ * (_Float16)w4;
             pOut++;
             blkCntSample--;
         }
@@ -156,7 +156,7 @@
 
         pOut = out;
         w = *pW++;
-        accum += w;
+        accum += (_Float16)w;
 
         blkCntSample = vecDim >> 3;
         while (blkCntSample > 0) 
@@ -174,7 +174,7 @@
         blkCntSample = vecDim & 7;
         while (blkCntSample > 0) 
         {
-            *pOut = *pOut + *pIn++ * w;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w;
             pOut++;
             blkCntSample--;
         }
@@ -184,7 +184,7 @@
 
     /* Normalize */
     pOut = out;
-    accum = 1.0f / accum;
+    accum = 1.0f16 / (_Float16)accum;
 
     blkCntSample = vecDim >> 3;
     while (blkCntSample > 0) 
@@ -201,7 +201,7 @@
     blkCntSample = vecDim & 7;
     while (blkCntSample > 0) 
     {
-        *pOut = *pOut * accum;
+        *pOut = (_Float16)*pOut * (_Float16)accum;
         pOut++;
         blkCntSample--;
     }
@@ -218,7 +218,7 @@
    blkCntVector = nbVectors;
    blkCntSample = vecDim;
 
-   accum = 0.0f;
+   accum = 0.0f16;
 
    pW = weights;
    pIn = in;
@@ -229,7 +229,7 @@
 
    while(blkCntSample > 0)
    {
-         *pOut = 0.0f;
+         *pOut = 0.0f16;
          pOut++;
          blkCntSample--;
    }
@@ -239,12 +239,12 @@
    {
       pOut = out;
       w = *pW++;
-      accum += w;
+      accum += (_Float16)w;
 
       blkCntSample = vecDim;
       while(blkCntSample > 0)
       {
-          *pOut = *pOut + *pIn++ * w;
+          *pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w;
           pOut++;
           blkCntSample--;
       }
@@ -258,7 +258,7 @@
 
    while(blkCntSample > 0)
    {
-         *pOut = *pOut / accum;
+         *pOut = (_Float16)*pOut / (_Float16)accum;
          pOut++;
          blkCntSample--;
    }
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c b/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
index 5c8575a..bb425d1 100755
--- a/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
@@ -136,7 +136,7 @@
         /*
          * convert from float to Q31 and then store the results in the destination buffer
          */
-        *pDst++ = clip_q31_to_q15((q31_t) (*pIn++ * 32768.0));
+        *pDst++ = clip_q31_to_q15((q31_t) ((_Float16)*pIn++ * 32768.0f16));
 
 #endif                          /*      #ifdef ARM_MATH_ROUNDING        */
 
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c b/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
index 960af8f..faef0db 100755
--- a/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
@@ -114,10 +114,10 @@
     /* C = (float16_t) A / 32768 */
 
     /* Convert from q15 to float and store result in destination buffer */
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -138,7 +138,7 @@
     /* C = (float16_t) A / 32768 */
 
     /* Convert from q15 to float and store result in destination buffer */
-    *pDst++ = ((float16_t) *pIn++ / 32768.0f);
+    *pDst++ = ((_Float16) *pIn++ / 32768.0f16);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c b/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c
index 77b15a7..2e64636 100755
--- a/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c
@@ -99,4 +99,4 @@
       pBitRevTab += bitRevFactor;
    }
 }
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
index 239e003..ea836be 100755
--- a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
@@ -793,7 +793,7 @@
         pSrc = p1 + 1;
         for(l=0; l<L; l++)
         {
-            *pSrc = -*pSrc;
+            *pSrc = -(_Float16)*pSrc;
             pSrc += 2;
         }
     }
@@ -823,13 +823,13 @@
 
     if (ifftFlag == 1U)
     {
-        invL = 1.0f/(float16_t)L;
+        invL = 1.0f16/(_Float16)L;
         /*  Conjugate and scale output data */
         pSrc = p1;
         for(l=0; l<L; l++)
         {
-            *pSrc++ *=   invL ;
-            *pSrc  = -(*pSrc) * invL;
+            *pSrc++ *=   (_Float16)invL ;
+            *pSrc  = -(_Float16)(*pSrc) * (_Float16)invL;
             pSrc++;
         }
     }
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c
index de21de9..8863fbc 100755
--- a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c
@@ -146,22 +146,22 @@
       l = i + n2;
 
       /*  Butterfly implementation */
-      a0 = pSrc[2 * i] + pSrc[2 * l];
-      xt = pSrc[2 * i] - pSrc[2 * l];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+      a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-      p0 = xt * cosVal;
-      p1 = yt * sinVal;
-      p2 = yt * cosVal;
-      p3 = xt * sinVal;
+      p0 = (_Float16)xt * (_Float16)cosVal;
+      p1 = (_Float16)yt * (_Float16)sinVal;
+      p2 = (_Float16)yt * (_Float16)cosVal;
+      p3 = (_Float16)xt * (_Float16)sinVal;
 
       pSrc[2 * i]     = a0;
       pSrc[2 * i + 1] = a1;
 
-      pSrc[2 * l]     = p0 + p1;
-      pSrc[2 * l + 1] = p2 - p3;
+      pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+      pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
       i++;
    }                             // groups loop end
@@ -188,22 +188,22 @@
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 + p1;
-            pSrc[2 * l + 1] = p2 - p3;
+            pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
             i += n1;
          } while ( i < fftLen );                        // butterfly loop end
@@ -215,11 +215,11 @@
    // loop for butterfly
    for (i = 0; i < fftLen; i += 2)
    {
-      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
-      xt = pSrc[2 * i] - pSrc[2 * i + 2];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
 
-      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
-      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
+      a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
 
       pSrc[2 * i] = a0;
       pSrc[2 * i + 1] = a1;
@@ -251,22 +251,22 @@
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 + p1;
-            pSrc[2 * l + 1] = p2 - p3;
+            pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
             i += n1;
          } while (i < fftLen);
@@ -307,22 +307,22 @@
       ia += twidCoefModifier;
 
       l = i + n2;
-      a0 = pSrc[2 * i] + pSrc[2 * l];
-      xt = pSrc[2 * i] - pSrc[2 * l];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+      a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-      p0 = xt * cosVal;
-      p1 = yt * sinVal;
-      p2 = yt * cosVal;
-      p3 = xt * sinVal;
+      p0 = (_Float16)xt * (_Float16)cosVal;
+      p1 = (_Float16)yt * (_Float16)sinVal;
+      p2 = (_Float16)yt * (_Float16)cosVal;
+      p3 = (_Float16)xt * (_Float16)sinVal;
 
       pSrc[2 * i] = a0;
       pSrc[2 * i + 1] = a1;
 
-      pSrc[2 * l]     = p0 - p1;
-      pSrc[2 * l + 1] = p2 + p3;
+      pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
+      pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
    }                             // groups loop end
 
    twidCoefModifier <<= 1U;
@@ -347,22 +347,22 @@
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 - p1;
-            pSrc[2 * l + 1] = p2 + p3;
+            pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
 
             i += n1;
          } while ( i < fftLen );                 // butterfly loop end
@@ -375,16 +375,16 @@
    // loop for butterfly
    for (i = 0; i < fftLen; i += 2)
    {
-      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
-      xt = pSrc[2 * i] - pSrc[2 * i + 2];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
 
-      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
-      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+      a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
 
-      p0 = a0 * onebyfftLen;
-      p2 = xt * onebyfftLen;
-      p1 = a1 * onebyfftLen;
-      p3 = yt * onebyfftLen;
+      p0 = (_Float16)a0 * (_Float16)onebyfftLen;
+      p2 = (_Float16)xt * (_Float16)onebyfftLen;
+      p1 = (_Float16)a1 * (_Float16)onebyfftLen;
+      p3 = (_Float16)yt * (_Float16)onebyfftLen;
 
       pSrc[2 * i] = p0;
       pSrc[2 * i + 1] = p1;
@@ -416,22 +416,22 @@
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 - p1;
-            pSrc[2 * l + 1] = p2 + p3;
+            pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
 
             i += n1;
          } while ( i < fftLen );                    // butterfly loop end
@@ -449,16 +449,16 @@
    {
       l = i + n2;
 
-      a0 = pSrc[2 * i] + pSrc[2 * l];
-      xt = pSrc[2 * i] - pSrc[2 * l];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
-      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
 
-      p0 = a0 * onebyfftLen;
-      p2 = xt * onebyfftLen;
-      p1 = a1 * onebyfftLen;
-      p3 = yt * onebyfftLen;
+      p0 = (_Float16)a0 * (_Float16)onebyfftLen;
+      p2 = (_Float16)xt * (_Float16)onebyfftLen;
+      p1 = (_Float16)a1 * (_Float16)onebyfftLen;
+      p3 = (_Float16)yt * (_Float16)onebyfftLen;
 
       pSrc[2 * i] = p0;
       pSrc[2U * l] = p2;
@@ -472,4 +472,4 @@
 }
 
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
index fad2b21..1fc5169 100755
--- a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
@@ -95,22 +95,22 @@
         l = i + n2;
        
         /*  Butterfly implementation */
-        a0 = pSrc[2 * i] + pSrc[2 * l];
-        xt = pSrc[2 * i] - pSrc[2 * l];
+        a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+        xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
   
-        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-        a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+        yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+        a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
   
-        p0 = xt * cosVal;
-        p1 = yt * sinVal;
-        p2 = yt * cosVal;
-        p3 = xt * sinVal;
+        p0 = (_Float16)xt * (_Float16)cosVal;
+        p1 = (_Float16)yt * (_Float16)sinVal;
+        p2 = (_Float16)yt * (_Float16)cosVal;
+        p3 = (_Float16)xt * (_Float16)sinVal;
   
         pSrc[2 * i]     = a0;
         pSrc[2 * i + 1] = a1;
   
-        pSrc[2 * l]     = p0 + p1;
-        pSrc[2 * l + 1] = p2 - p3;
+        pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+        pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
     }
 
@@ -228,13 +228,13 @@
       ydIn = pSrc[(2U * i3) + 1U];
 
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /*  index calculation for the coefficients */
       ia2 = ia1 + ia1;
@@ -242,31 +242,31 @@
       si2 = pCoef[(ia2 * 2U) + 1U];
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
       /* xb - xd */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
       /* yb - yd */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = xa + xb + xc + xd */
-      pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+      pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
       /* ya' = ya + yb + yc + yd */
-      pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+      pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
       /* (xa - xc) + (yb - yd) */
-      Xb12C_out = (Xaminusc + Ybminusd);
+      Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* (ya - yc) + (xb - xd) */
-      Yb12C_out = (Yaminusc - Xbminusd);
+      Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
       /* (xa + xc) - (xb + xd) */
-      Xc12C_out = (Xaplusc - Xbplusd);
+      Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* (ya + yc) - (yb + yd) */
-      Yc12C_out = (Yaplusc - Ybplusd);
+      Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* (xa - xc) - (yb - yd) */
-      Xd12C_out = (Xaminusc - Ybminusd);
+      Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* (ya - yc) + (xb - xd) */
-      Yd12C_out = (Xbminusd + Yaminusc);
+      Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
 
       co1 = pCoef[ia1 * 2U];
       si1 = pCoef[(ia1 * 2U) + 1U];
@@ -276,38 +276,38 @@
       co3 = pCoef[ia3 * 2U];
       si3 = pCoef[(ia3 * 2U) + 1U];
 
-      Xb12_out = Xb12C_out * co1;
-      Yb12_out = Yb12C_out * co1;
-      Xc12_out = Xc12C_out * co2;
-      Yc12_out = Yc12C_out * co2;
-      Xd12_out = Xd12C_out * co3;
-      Yd12_out = Yd12C_out * co3;
+      Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+      Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+      Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+      Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+      Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+      Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
       //Xb12_out -= Yb12C_out * si1;
-      p0 = Yb12C_out * si1;
+      p0 = (_Float16)Yb12C_out * (_Float16)si1;
       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
       //Yb12_out += Xb12C_out * si1;
-      p1 = Xb12C_out * si1;
+      p1 = (_Float16)Xb12C_out * (_Float16)si1;
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
       //Xc12_out -= Yc12C_out * si2;
-      p2 = Yc12C_out * si2;
+      p2 = (_Float16)Yc12C_out * (_Float16)si2;
       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
       //Yc12_out += Xc12C_out * si2;
-      p3 = Xc12C_out * si2;
+      p3 = (_Float16)Xc12C_out * (_Float16)si2;
       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
       //Xd12_out -= Yd12C_out * si3;
-      p4 = Yd12C_out * si3;
+      p4 = (_Float16)Yd12C_out * (_Float16)si3;
       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
       //Yd12_out += Xd12C_out * si3;
-      p5 = Xd12C_out * si3;
+      p5 = (_Float16)Xd12C_out * (_Float16)si3;
 
-      Xb12_out += p0;
-      Yb12_out -= p1;
-      Xc12_out += p2;
-      Yc12_out -= p3;
-      Xd12_out += p4;
-      Yd12_out -= p5;
+      Xb12_out += (_Float16)p0;
+      Yb12_out -= (_Float16)p1;
+      Xc12_out += (_Float16)p2;
+      Yc12_out -= (_Float16)p3;
+      Xd12_out += (_Float16)p4;
+      Yd12_out -= (_Float16)p5;
 
       /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
       pSrc[2U * i1] = Xc12_out;
@@ -385,71 +385,71 @@
             ydIn = pSrc[(2U * i3) + 1U];
 
             /* xa - xc */
-            Xaminusc = xaIn - xcIn;
+            Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
             /* (xb - xd) */
-            Xbminusd = xbIn - xdIn;
+            Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
             /* ya - yc */
-            Yaminusc = yaIn - ycIn;
+            Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
             /* (yb - yd) */
-            Ybminusd = ybIn - ydIn;
+            Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
             /* xa + xc */
-            Xaplusc = xaIn + xcIn;
+            Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
             /* xb + xd */
-            Xbplusd = xbIn + xdIn;
+            Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
             /* ya + yc */
-            Yaplusc = yaIn + ycIn;
+            Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
             /* yb + yd */
-            Ybplusd = ybIn + ydIn;
+            Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
             /* (xa - xc) + (yb - yd) */
-            Xb12C_out = (Xaminusc + Ybminusd);
+            Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
             /* (ya - yc) -  (xb - xd) */
-            Yb12C_out = (Yaminusc - Xbminusd);
+            Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
             /* xa + xc -(xb + xd) */
-            Xc12C_out = (Xaplusc - Xbplusd);
+            Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
             /* (ya + yc) - (yb + yd) */
-            Yc12C_out = (Yaplusc - Ybplusd);
+            Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
             /* (xa - xc) - (yb - yd) */
-            Xd12C_out = (Xaminusc - Ybminusd);
+            Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
             /* (ya - yc) +  (xb - xd) */
-            Yd12C_out = (Xbminusd + Yaminusc);
+            Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
 
-            pSrc[(2U * i0)] = Xaplusc + Xbplusd;
-            pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+            pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
+            pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
-            Xb12_out = Xb12C_out * co1;
-            Yb12_out = Yb12C_out * co1;
-            Xc12_out = Xc12C_out * co2;
-            Yc12_out = Yc12C_out * co2;
-            Xd12_out = Xd12C_out * co3;
-            Yd12_out = Yd12C_out * co3;
+            Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+            Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+            Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+            Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+            Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+            Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
             //Xb12_out -= Yb12C_out * si1;
-            p0 = Yb12C_out * si1;
+            p0 = (_Float16)Yb12C_out * (_Float16)si1;
             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
             //Yb12_out += Xb12C_out * si1;
-            p1 = Xb12C_out * si1;
+            p1 = (_Float16)Xb12C_out * (_Float16)si1;
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
             //Xc12_out -= Yc12C_out * si2;
-            p2 = Yc12C_out * si2;
+            p2 = (_Float16)Yc12C_out * (_Float16)si2;
             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
             //Yc12_out += Xc12C_out * si2;
-            p3 = Xc12C_out * si2;
+            p3 = (_Float16)Xc12C_out * (_Float16)si2;
             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
             //Xd12_out -= Yd12C_out * si3;
-            p4 = Yd12C_out * si3;
+            p4 = (_Float16)Yd12C_out * (_Float16)si3;
             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
             //Yd12_out += Xd12C_out * si3;
-            p5 = Xd12C_out * si3;
+            p5 = (_Float16)Xd12C_out * (_Float16)si3;
 
-            Xb12_out += p0;
-            Yb12_out -= p1;
-            Xc12_out += p2;
-            Yc12_out -= p3;
-            Xd12_out += p4;
-            Yd12_out -= p5;
+            Xb12_out += (_Float16)p0;
+            Yb12_out -= (_Float16)p1;
+            Xc12_out += (_Float16)p2;
+            Yc12_out -= (_Float16)p3;
+            Xd12_out += (_Float16)p4;
+            Yd12_out -= (_Float16)p5;
 
             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
             pSrc[2U * i1] = Xc12_out;
@@ -492,45 +492,45 @@
       ydIn = ptr1[7];
 
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
 
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
 
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
 
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
 
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /* (xb-xd) */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
 
       /* (yb-yd) */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = xa + xb + xc + xd */
-      a0 = (Xaplusc + Xbplusd);
+      a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
       /* ya' = ya + yb + yc + yd */
-      a1 = (Yaplusc + Ybplusd);
+      a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
       /* xc' = (xa-xb+xc-xd) */
-      a2 = (Xaplusc - Xbplusd);
+      a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* yc' = (ya-yb+yc-yd) */
-      a3 = (Yaplusc - Ybplusd);
+      a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* xb' = (xa+yb-xc-yd) */
-      a4 = (Xaminusc + Ybminusd);
+      a4 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* yb' = (ya-xb-yc+xd) */
-      a5 = (Yaminusc - Xbminusd);
+      a5 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
       /* xd' = (xa-yb-xc+yd)) */
-      a6 = (Xaminusc - Ybminusd);
+      a6 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* yd' = (ya+xb-yc-xd) */
-      a7 = (Xbminusd + Yaminusc);
+      a7 = ((_Float16)Xbminusd + (_Float16)Yaminusc);
 
       ptr1[0] = a0;
       ptr1[1] = a1;
@@ -588,70 +588,70 @@
             i3 = i2 + n2;
 
             /* xa + xc */
-            r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
+            r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
 
             /* xa - xc */
-            r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
+            r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
 
             /* ya + yc */
-            s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+            s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
 
             /* ya - yc */
-            s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+            s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
 
             /* xb + xd */
-            t1 = pSrc[2U * i1] + pSrc[2U * i3];
+            t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
 
             /* xa' = xa + xb + xc + xd */
-            pSrc[2U * i0] = r1 + t1;
+            pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
 
             /* xa + xc -(xb + xd) */
-            r1 = r1 - t1;
+            r1 = (_Float16)r1 - (_Float16)t1;
 
             /* yb + yd */
-            t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+            t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
 
             /* ya' = ya + yb + yc + yd */
-            pSrc[(2U * i0) + 1U] = s1 + t2;
+            pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
 
             /* (ya + yc) - (yb + yd) */
-            s1 = s1 - t2;
+            s1 = (_Float16)s1 - (_Float16)t2;
 
             /* (yb - yd) */
-            t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+            t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
 
             /* (xb - xd) */
-            t2 = pSrc[2U * i1] - pSrc[2U * i3];
+            t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
 
             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
-            pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
+            pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) + ((_Float16)s1 * (_Float16)si2);
 
             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
-            pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
+            pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) - ((_Float16)r1 * (_Float16)si2);
 
             /* (xa - xc) + (yb - yd) */
-            r1 = r2 + t1;
+            r1 = (_Float16)r2 + (_Float16)t1;
 
             /* (xa - xc) - (yb - yd) */
-            r2 = r2 - t1;
+            r2 = (_Float16)r2 - (_Float16)t1;
 
             /* (ya - yc) -  (xb - xd) */
-            s1 = s2 - t2;
+            s1 = (_Float16)s2 - (_Float16)t2;
 
             /* (ya - yc) +  (xb - xd) */
-            s2 = s2 + t2;
+            s2 = (_Float16)s2 + (_Float16)t2;
 
             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
-            pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
+            pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) + ((_Float16)s1 * (_Float16)si1);
 
             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
-            pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
+            pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) - ((_Float16)r1 * (_Float16)si1);
 
             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
-            pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
+            pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) + ((_Float16)s2 * (_Float16)si3);
 
             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
-            pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
+            pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) - ((_Float16)r2 * (_Float16)si3);
 
             i0 += n1;
          } while ( i0 < fftLen);
@@ -732,13 +732,13 @@
       ydIn = pSrc[(2U * i3) + 1U];
 
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /*  index calculation for the coefficients */
       ia2 = ia1 + ia1;
@@ -746,32 +746,32 @@
       si2 = pCoef[(ia2 * 2U) + 1U];
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
       /* xb - xd */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
       /* yb - yd */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = xa + xb + xc + xd */
-      pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+      pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
 
       /* ya' = ya + yb + yc + yd */
-      pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+      pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
       /* (xa - xc) - (yb - yd) */
-      Xb12C_out = (Xaminusc - Ybminusd);
+      Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* (ya - yc) + (xb - xd) */
-      Yb12C_out = (Yaminusc + Xbminusd);
+      Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
       /* (xa + xc) - (xb + xd) */
-      Xc12C_out = (Xaplusc - Xbplusd);
+      Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* (ya + yc) - (yb + yd) */
-      Yc12C_out = (Yaplusc - Ybplusd);
+      Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* (xa - xc) + (yb - yd) */
-      Xd12C_out = (Xaminusc + Ybminusd);
+      Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* (ya - yc) - (xb - xd) */
-      Yd12C_out = (Yaminusc - Xbminusd);
+      Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
 
       co1 = pCoef[ia1 * 2U];
       si1 = pCoef[(ia1 * 2U) + 1U];
@@ -781,38 +781,38 @@
       co3 = pCoef[ia3 * 2U];
       si3 = pCoef[(ia3 * 2U) + 1U];
 
-      Xb12_out = Xb12C_out * co1;
-      Yb12_out = Yb12C_out * co1;
-      Xc12_out = Xc12C_out * co2;
-      Yc12_out = Yc12C_out * co2;
-      Xd12_out = Xd12C_out * co3;
-      Yd12_out = Yd12C_out * co3;
+      Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+      Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+      Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+      Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+      Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+      Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
       //Xb12_out -= Yb12C_out * si1;
-      p0 = Yb12C_out * si1;
+      p0 = (_Float16)Yb12C_out * (_Float16)si1;
       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
       //Yb12_out += Xb12C_out * si1;
-      p1 = Xb12C_out * si1;
+      p1 = (_Float16)Xb12C_out * (_Float16)si1;
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
       //Xc12_out -= Yc12C_out * si2;
-      p2 = Yc12C_out * si2;
+      p2 = (_Float16)Yc12C_out * (_Float16)si2;
       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
       //Yc12_out += Xc12C_out * si2;
-      p3 = Xc12C_out * si2;
+      p3 = (_Float16)Xc12C_out * (_Float16)si2;
       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
       //Xd12_out -= Yd12C_out * si3;
-      p4 = Yd12C_out * si3;
+      p4 = (_Float16)Yd12C_out * (_Float16)si3;
       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
       //Yd12_out += Xd12C_out * si3;
-      p5 = Xd12C_out * si3;
+      p5 =(_Float16) Xd12C_out * (_Float16)si3;
 
-      Xb12_out -= p0;
-      Yb12_out += p1;
-      Xc12_out -= p2;
-      Yc12_out += p3;
-      Xd12_out -= p4;
-      Yd12_out += p5;
+      Xb12_out -= (_Float16)p0;
+      Yb12_out += (_Float16)p1;
+      Xc12_out -= (_Float16)p2;
+      Yc12_out += (_Float16)p3;
+      Xd12_out -= (_Float16)p4;
+      Yd12_out += (_Float16)p5;
 
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
       pSrc[2U * i1] = Xc12_out;
@@ -889,71 +889,71 @@
             ydIn = pSrc[(2U * i3) + 1U];
 
             /* xa - xc */
-            Xaminusc = xaIn - xcIn;
+            Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
             /* (xb - xd) */
-            Xbminusd = xbIn - xdIn;
+            Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
             /* ya - yc */
-            Yaminusc = yaIn - ycIn;
+            Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
             /* (yb - yd) */
-            Ybminusd = ybIn - ydIn;
+            Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
             /* xa + xc */
-            Xaplusc = xaIn + xcIn;
+            Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
             /* xb + xd */
-            Xbplusd = xbIn + xdIn;
+            Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
             /* ya + yc */
-            Yaplusc = yaIn + ycIn;
+            Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
             /* yb + yd */
-            Ybplusd = ybIn + ydIn;
+            Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
             /* (xa - xc) - (yb - yd) */
-            Xb12C_out = (Xaminusc - Ybminusd);
+            Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
             /* (ya - yc) +  (xb - xd) */
-            Yb12C_out = (Yaminusc + Xbminusd);
+            Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
             /* xa + xc -(xb + xd) */
-            Xc12C_out = (Xaplusc - Xbplusd);
+            Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
             /* (ya + yc) - (yb + yd) */
-            Yc12C_out = (Yaplusc - Ybplusd);
+            Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
             /* (xa - xc) + (yb - yd) */
-            Xd12C_out = (Xaminusc + Ybminusd);
+            Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
             /* (ya - yc) -  (xb - xd) */
-            Yd12C_out = (Yaminusc - Xbminusd);
+            Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
 
-            pSrc[(2U * i0)] = Xaplusc + Xbplusd;
-            pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+            pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
+            pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
-            Xb12_out = Xb12C_out * co1;
-            Yb12_out = Yb12C_out * co1;
-            Xc12_out = Xc12C_out * co2;
-            Yc12_out = Yc12C_out * co2;
-            Xd12_out = Xd12C_out * co3;
-            Yd12_out = Yd12C_out * co3;
+            Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+            Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+            Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+            Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+            Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+            Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
             //Xb12_out -= Yb12C_out * si1;
-            p0 = Yb12C_out * si1;
+            p0 = (_Float16)Yb12C_out * (_Float16)si1;
             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
             //Yb12_out += Xb12C_out * si1;
-            p1 = Xb12C_out * si1;
+            p1 = (_Float16)Xb12C_out * (_Float16)si1;
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
             //Xc12_out -= Yc12C_out * si2;
-            p2 = Yc12C_out * si2;
+            p2 = (_Float16)Yc12C_out * (_Float16)si2;
             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
             //Yc12_out += Xc12C_out * si2;
-            p3 = Xc12C_out * si2;
+            p3 = (_Float16)Xc12C_out * (_Float16)si2;
             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
             //Xd12_out -= Yd12C_out * si3;
-            p4 = Yd12C_out * si3;
+            p4 = (_Float16)Yd12C_out * (_Float16)si3;
             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
             //Yd12_out += Xd12C_out * si3;
-            p5 = Xd12C_out * si3;
+            p5 = (_Float16)Xd12C_out * (_Float16)si3;
 
-            Xb12_out -= p0;
-            Yb12_out += p1;
-            Xc12_out -= p2;
-            Yc12_out += p3;
-            Xd12_out -= p4;
-            Yd12_out += p5;
+            Xb12_out -= (_Float16)p0;
+            Yb12_out += (_Float16)p1;
+            Xc12_out -= (_Float16)p2;
+            Yc12_out += (_Float16)p3;
+            Xd12_out -= (_Float16)p4;
+            Yd12_out += (_Float16)p5;
 
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
             pSrc[2U * i1] = Xc12_out;
@@ -998,54 +998,54 @@
 
       /*  Butterfly implementation */
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
 
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
 
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
 
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
 
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /* (xb-xd) */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
 
       /* (yb-yd) */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
-      a0 = (Xaplusc + Xbplusd);
+      a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
-      a1 = (Yaplusc + Ybplusd);
+      a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
-      a2 = (Xaplusc - Xbplusd);
+      a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
-      a3 = (Yaplusc - Ybplusd);
+      a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
-      a4 = (Xaminusc - Ybminusd);
+      a4 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
-      a5 = (Yaminusc + Xbminusd);
+      a5 = ((_Float16)Yaminusc + (_Float16)Xbminusd);
       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
-      a6 = (Xaminusc + Ybminusd);
+      a6 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
-      a7 = (Yaminusc - Xbminusd);
+      a7 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
 
-      p0 = a0 * onebyfftLen;
-      p1 = a1 * onebyfftLen;
-      p2 = a2 * onebyfftLen;
-      p3 = a3 * onebyfftLen;
-      p4 = a4 * onebyfftLen;
-      p5 = a5 * onebyfftLen;
-      p6 = a6 * onebyfftLen;
-      p7 = a7 * onebyfftLen;
+      p0 = (_Float16)a0 * (_Float16)onebyfftLen;
+      p1 = (_Float16)a1 * (_Float16)onebyfftLen;
+      p2 = (_Float16)a2 * (_Float16)onebyfftLen;
+      p3 = (_Float16)a3 * (_Float16)onebyfftLen;
+      p4 = (_Float16)a4 * (_Float16)onebyfftLen;
+      p5 = (_Float16)a5 * (_Float16)onebyfftLen;
+      p6 = (_Float16)a6 * (_Float16)onebyfftLen;
+      p7 = (_Float16)a7 * (_Float16)onebyfftLen;
 
       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
       ptr1[0] = p0;
@@ -1114,70 +1114,70 @@
             i3 = i2 + n2;
 
             /* xa + xc */
-            r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
+            r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
 
             /* xa - xc */
-            r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
+            r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
 
             /* ya + yc */
-            s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+            s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
 
             /* ya - yc */
-            s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+            s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
 
             /* xb + xd */
-            t1 = pSrc[2U * i1] + pSrc[2U * i3];
+            t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
 
             /* xa' = xa + xb + xc + xd */
-            pSrc[2U * i0] = r1 + t1;
+            pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
 
             /* xa + xc -(xb + xd) */
-            r1 = r1 - t1;
+            r1 = (_Float16)r1 - (_Float16)t1;
 
             /* yb + yd */
-            t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+            t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
 
             /* ya' = ya + yb + yc + yd */
-            pSrc[(2U * i0) + 1U] = s1 + t2;
+            pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
 
             /* (ya + yc) - (yb + yd) */
-            s1 = s1 - t2;
+            s1 = (_Float16)s1 - (_Float16)t2;
 
             /* (yb - yd) */
-            t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+            t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
 
             /* (xb - xd) */
-            t2 = pSrc[2U * i1] - pSrc[2U * i3];
+            t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
 
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
-            pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
+            pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) - ((_Float16)s1 * (_Float16)si2);
 
             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
-            pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
+            pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) + ((_Float16)r1 * (_Float16)si2);
 
             /* (xa - xc) - (yb - yd) */
-            r1 = r2 - t1;
+            r1 = (_Float16)r2 - (_Float16)t1;
 
             /* (xa - xc) + (yb - yd) */
-            r2 = r2 + t1;
+            r2 = (_Float16)r2 + (_Float16)t1;
 
             /* (ya - yc) +  (xb - xd) */
-            s1 = s2 + t2;
+            s1 = (_Float16)s2 + (_Float16)t2;
 
             /* (ya - yc) -  (xb - xd) */
-            s2 = s2 - t2;
+            s2 = (_Float16)s2 - (_Float16)t2;
 
             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
-            pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
+            pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) - ((_Float16)s1 * (_Float16)si1);
 
             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
-            pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
+            pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) + ((_Float16)r1 * (_Float16)si1);
 
             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
-            pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
+            pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) - ((_Float16)s2 * (_Float16)si3);
 
             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
-            pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
+            pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) + ((_Float16)r2 * (_Float16)si3);
 
             i0 += n1;
          } while ( i0 < fftLen);
@@ -1200,73 +1200,73 @@
 
       /*  Butterfly implementation */
       /* xa + xc */
-      r1 = pSrc[2U * i0] + pSrc[2U * i2];
+      r1 = (_Float16)pSrc[2U * i0] + (_Float16)pSrc[2U * i2];
 
       /* xa - xc */
-      r2 = pSrc[2U * i0] - pSrc[2U * i2];
+      r2 = (_Float16)pSrc[2U * i0] - (_Float16)pSrc[2U * i2];
 
       /* ya + yc */
-      s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+      s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
 
       /* ya - yc */
-      s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+      s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
 
       /* xc + xd */
-      t1 = pSrc[2U * i1] + pSrc[2U * i3];
+      t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
 
       /* xa' = xa + xb + xc + xd */
-      pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
+      pSrc[2U * i0] = ((_Float16)r1 + (_Float16)t1) * (_Float16)onebyfftLen;
 
       /* (xa + xb) - (xc + xd) */
-      r1 = r1 - t1;
+      r1 = (_Float16)r1 - (_Float16)t1;
 
       /* yb + yd */
-      t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+      t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
 
       /* ya' = ya + yb + yc + yd */
-      pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
+      pSrc[(2U * i0) + 1U] = ((_Float16)s1 + (_Float16)t2) * (_Float16)onebyfftLen;
 
       /* (ya + yc) - (yb + yd) */
-      s1 = s1 - t2;
+      s1 = (_Float16)s1 - (_Float16)t2;
 
       /* (yb-yd) */
-      t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+      t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
 
       /* (xb-xd) */
-      t2 = pSrc[2U * i1] - pSrc[2U * i3];
+      t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
 
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
-      pSrc[2U * i1] = r1 * onebyfftLen;
+      pSrc[2U * i1] = (_Float16)r1 * (_Float16)onebyfftLen;
 
       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
-      pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
+      pSrc[(2U * i1) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
 
       /* (xa - xc) - (yb-yd) */
-      r1 = r2 - t1;
+      r1 = (_Float16)r2 - (_Float16)t1;
 
       /* (xa - xc) + (yb-yd) */
-      r2 = r2 + t1;
+      r2 = (_Float16)r2 + (_Float16)t1;
 
       /* (ya - yc) + (xb-xd) */
-      s1 = s2 + t2;
+      s1 = (_Float16)s2 + (_Float16)t2;
 
       /* (ya - yc) - (xb-xd) */
-      s2 = s2 - t2;
+      s2 = (_Float16)s2 - (_Float16)t2;
 
       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
-      pSrc[2U * i2] = r1 * onebyfftLen;
+      pSrc[2U * i2] = (_Float16)r1 * (_Float16)onebyfftLen;
 
       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
-      pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
+      pSrc[(2U * i2) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
 
       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
-      pSrc[2U * i3] = r2 * onebyfftLen;
+      pSrc[2U * i3] = (_Float16)r2 * (_Float16)onebyfftLen;
 
       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
-      pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
+      pSrc[(2U * i3) + 1U] = (_Float16)s2 * (_Float16)onebyfftLen;
    }
 
 #endif /* #if defined (ARM_MATH_DSP) */
 }
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c
index 79f9311..b7eb37a 100755
--- a/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c
@@ -61,7 +61,7 @@
    float16_t p1, p2, p3, p4;
    float16_t co2, co3, co4, co5, co6, co7, co8;
    float16_t si2, si3, si4, si5, si6, si7, si8;
-   const float16_t C81 = 0.70710678118f;
+   const float16_t C81 = 0.70710678118f16;
 
    n2 = fftLen;
 
@@ -80,58 +80,58 @@
          i6 = i5 + n2;
          i7 = i6 + n2;
          i8 = i7 + n2;
-         r1 = pSrc[2 * i1] + pSrc[2 * i5];
-         r5 = pSrc[2 * i1] - pSrc[2 * i5];
-         r2 = pSrc[2 * i2] + pSrc[2 * i6];
-         r6 = pSrc[2 * i2] - pSrc[2 * i6];
-         r3 = pSrc[2 * i3] + pSrc[2 * i7];
-         r7 = pSrc[2 * i3] - pSrc[2 * i7];
-         r4 = pSrc[2 * i4] + pSrc[2 * i8];
-         r8 = pSrc[2 * i4] - pSrc[2 * i8];
-         t1 = r1 - r3;
-         r1 = r1 + r3;
-         r3 = r2 - r4;
-         r2 = r2 + r4;
-         pSrc[2 * i1] = r1 + r2;
-         pSrc[2 * i5] = r1 - r2;
-         r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
-         s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
-         r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
-         s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
-         s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
-         s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
-         r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
-         s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
-         t2 = r1 - s3;
-         r1 = r1 + s3;
-         s3 = r2 - r4;
-         r2 = r2 + r4;
-         pSrc[2 * i1 + 1] = r1 + r2;
-         pSrc[2 * i5 + 1] = r1 - r2;
-         pSrc[2 * i3]     = t1 + s3;
-         pSrc[2 * i7]     = t1 - s3;
-         pSrc[2 * i3 + 1] = t2 - r3;
-         pSrc[2 * i7 + 1] = t2 + r3;
-         r1 = (r6 - r8) * C81;
-         r6 = (r6 + r8) * C81;
-         r2 = (s6 - s8) * C81;
-         s6 = (s6 + s8) * C81;
-         t1 = r5 - r1;
-         r5 = r5 + r1;
-         r8 = r7 - r6;
-         r7 = r7 + r6;
-         t2 = s5 - r2;
-         s5 = s5 + r2;
-         s8 = s7 - s6;
-         s7 = s7 + s6;
-         pSrc[2 * i2]     = r5 + s7;
-         pSrc[2 * i8]     = r5 - s7;
-         pSrc[2 * i6]     = t1 + s8;
-         pSrc[2 * i4]     = t1 - s8;
-         pSrc[2 * i2 + 1] = s5 - r7;
-         pSrc[2 * i8 + 1] = s5 + r7;
-         pSrc[2 * i6 + 1] = t2 - r8;
-         pSrc[2 * i4 + 1] = t2 + r8;
+         r1 = (_Float16)pSrc[2 * i1] + (_Float16)pSrc[2 * i5];
+         r5 = (_Float16)pSrc[2 * i1] - (_Float16)pSrc[2 * i5];
+         r2 = (_Float16)pSrc[2 * i2] + (_Float16)pSrc[2 * i6];
+         r6 = (_Float16)pSrc[2 * i2] - (_Float16)pSrc[2 * i6];
+         r3 = (_Float16)pSrc[2 * i3] + (_Float16)pSrc[2 * i7];
+         r7 = (_Float16)pSrc[2 * i3] - (_Float16)pSrc[2 * i7];
+         r4 = (_Float16)pSrc[2 * i4] + (_Float16)pSrc[2 * i8];
+         r8 = (_Float16)pSrc[2 * i4] - (_Float16)pSrc[2 * i8];
+         t1 = (_Float16)r1 - (_Float16)r3;
+         r1 = (_Float16)r1 + (_Float16)r3;
+         r3 = (_Float16)r2 - (_Float16)r4;
+         r2 = (_Float16)r2 + (_Float16)r4;
+         pSrc[2 * i1] = (_Float16)r1 + (_Float16)r2;
+         pSrc[2 * i5] = (_Float16)r1 - (_Float16)r2;
+         r1 = (_Float16)pSrc[2 * i1 + 1] + (_Float16)pSrc[2 * i5 + 1];
+         s5 = (_Float16)pSrc[2 * i1 + 1] - (_Float16)pSrc[2 * i5 + 1];
+         r2 = (_Float16)pSrc[2 * i2 + 1] + (_Float16)pSrc[2 * i6 + 1];
+         s6 = (_Float16)pSrc[2 * i2 + 1] - (_Float16)pSrc[2 * i6 + 1];
+         s3 = (_Float16)pSrc[2 * i3 + 1] + (_Float16)pSrc[2 * i7 + 1];
+         s7 = (_Float16)pSrc[2 * i3 + 1] - (_Float16)pSrc[2 * i7 + 1];
+         r4 = (_Float16)pSrc[2 * i4 + 1] + (_Float16)pSrc[2 * i8 + 1];
+         s8 = (_Float16)pSrc[2 * i4 + 1] - (_Float16)pSrc[2 * i8 + 1];
+         t2 = (_Float16)r1 - (_Float16)s3;
+         r1 = (_Float16)r1 + (_Float16)s3;
+         s3 = (_Float16)r2 - (_Float16)r4;
+         r2 = (_Float16)r2 + (_Float16)r4;
+         pSrc[2 * i1 + 1] = (_Float16)r1 + (_Float16)r2;
+         pSrc[2 * i5 + 1] = (_Float16)r1 - (_Float16)r2;
+         pSrc[2 * i3]     = (_Float16)t1 + (_Float16)s3;
+         pSrc[2 * i7]     = (_Float16)t1 - (_Float16)s3;
+         pSrc[2 * i3 + 1] = (_Float16)t2 - (_Float16)r3;
+         pSrc[2 * i7 + 1] = (_Float16)t2 + (_Float16)r3;
+         r1 = ((_Float16)r6 - (_Float16)r8) * (_Float16)C81;
+         r6 = ((_Float16)r6 + (_Float16)r8) * (_Float16)C81;
+         r2 = ((_Float16)s6 - (_Float16)s8) * (_Float16)C81;
+         s6 = ((_Float16)s6 + (_Float16)s8) * (_Float16)C81;
+         t1 = (_Float16)r5 - (_Float16)r1;
+         r5 = (_Float16)r5 + (_Float16)r1;
+         r8 = (_Float16)r7 - (_Float16)r6;
+         r7 = (_Float16)r7 + (_Float16)r6;
+         t2 = (_Float16)s5 - (_Float16)r2;
+         s5 = (_Float16)s5 + (_Float16)r2;
+         s8 = (_Float16)s7 - (_Float16)s6;
+         s7 = (_Float16)s7 + (_Float16)s6;
+         pSrc[2 * i2]     = (_Float16)r5 + (_Float16)s7;
+         pSrc[2 * i8]     = (_Float16)r5 - (_Float16)s7;
+         pSrc[2 * i6]     = (_Float16)t1 + (_Float16)s8;
+         pSrc[2 * i4]     = (_Float16)t1 - (_Float16)s8;
+         pSrc[2 * i2 + 1] = (_Float16)s5 - (_Float16)r7;
+         pSrc[2 * i8 + 1] = (_Float16)s5 + (_Float16)r7;
+         pSrc[2 * i6 + 1] = (_Float16)t2 - (_Float16)r8;
+         pSrc[2 * i4 + 1] = (_Float16)t2 + (_Float16)r8;
 
          i1 += n1;
       } while (i1 < fftLen);
@@ -181,100 +181,100 @@
             i6 = i5 + n2;
             i7 = i6 + n2;
             i8 = i7 + n2;
-            r1 = pSrc[2 * i1] + pSrc[2 * i5];
-            r5 = pSrc[2 * i1] - pSrc[2 * i5];
-            r2 = pSrc[2 * i2] + pSrc[2 * i6];
-            r6 = pSrc[2 * i2] - pSrc[2 * i6];
-            r3 = pSrc[2 * i3] + pSrc[2 * i7];
-            r7 = pSrc[2 * i3] - pSrc[2 * i7];
-            r4 = pSrc[2 * i4] + pSrc[2 * i8];
-            r8 = pSrc[2 * i4] - pSrc[2 * i8];
-            t1 = r1 - r3;
-            r1 = r1 + r3;
-            r3 = r2 - r4;
-            r2 = r2 + r4;
-            pSrc[2 * i1] = r1 + r2;
-            r2 = r1 - r2;
-            s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
-            s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
-            s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
-            s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
-            s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
-            s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
-            s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
-            s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
-            t2 = s1 - s3;
-            s1 = s1 + s3;
-            s3 = s2 - s4;
-            s2 = s2 + s4;
-            r1 = t1 + s3;
-            t1 = t1 - s3;
-            pSrc[2 * i1 + 1] = s1 + s2;
-            s2 = s1 - s2;
-            s1 = t2 - r3;
-            t2 = t2 + r3;
-            p1 = co5 * r2;
-            p2 = si5 * s2;
-            p3 = co5 * s2;
-            p4 = si5 * r2;
-            pSrc[2 * i5]     = p1 + p2;
-            pSrc[2 * i5 + 1] = p3 - p4;
-            p1 = co3 * r1;
-            p2 = si3 * s1;
-            p3 = co3 * s1;
-            p4 = si3 * r1;
-            pSrc[2 * i3]     = p1 + p2;
-            pSrc[2 * i3 + 1] = p3 - p4;
-            p1 = co7 * t1;
-            p2 = si7 * t2;
-            p3 = co7 * t2;
-            p4 = si7 * t1;
-            pSrc[2 * i7]     = p1 + p2;
-            pSrc[2 * i7 + 1] = p3 - p4;
-            r1 = (r6 - r8) * C81;
-            r6 = (r6 + r8) * C81;
-            s1 = (s6 - s8) * C81;
-            s6 = (s6 + s8) * C81;
-            t1 = r5 - r1;
-            r5 = r5 + r1;
-            r8 = r7 - r6;
-            r7 = r7 + r6;
-            t2 = s5 - s1;
-            s5 = s5 + s1;
-            s8 = s7 - s6;
-            s7 = s7 + s6;
-            r1 = r5 + s7;
-            r5 = r5 - s7;
-            r6 = t1 + s8;
-            t1 = t1 - s8;
-            s1 = s5 - r7;
-            s5 = s5 + r7;
-            s6 = t2 - r8;
-            t2 = t2 + r8;
-            p1 = co2 * r1;
-            p2 = si2 * s1;
-            p3 = co2 * s1;
-            p4 = si2 * r1;
-            pSrc[2 * i2]     = p1 + p2;
-            pSrc[2 * i2 + 1] = p3 - p4;
-            p1 = co8 * r5;
-            p2 = si8 * s5;
-            p3 = co8 * s5;
-            p4 = si8 * r5;
-            pSrc[2 * i8]     = p1 + p2;
-            pSrc[2 * i8 + 1] = p3 - p4;
-            p1 = co6 * r6;
-            p2 = si6 * s6;
-            p3 = co6 * s6;
-            p4 = si6 * r6;
-            pSrc[2 * i6]     = p1 + p2;
-            pSrc[2 * i6 + 1] = p3 - p4;
-            p1 = co4 * t1;
-            p2 = si4 * t2;
-            p3 = co4 * t2;
-            p4 = si4 * t1;
-            pSrc[2 * i4]     = p1 + p2;
-            pSrc[2 * i4 + 1] = p3 - p4;
+            r1 = (_Float16)pSrc[2 * i1] + (_Float16)pSrc[2 * i5];
+            r5 = (_Float16)pSrc[2 * i1] - (_Float16)pSrc[2 * i5];
+            r2 = (_Float16)pSrc[2 * i2] + (_Float16)pSrc[2 * i6];
+            r6 = (_Float16)pSrc[2 * i2] - (_Float16)pSrc[2 * i6];
+            r3 = (_Float16)pSrc[2 * i3] + (_Float16)pSrc[2 * i7];
+            r7 = (_Float16)pSrc[2 * i3] - (_Float16)pSrc[2 * i7];
+            r4 = (_Float16)pSrc[2 * i4] + (_Float16)pSrc[2 * i8];
+            r8 = (_Float16)pSrc[2 * i4] - (_Float16)pSrc[2 * i8];
+            t1 = (_Float16)r1 - (_Float16)r3;
+            r1 = (_Float16)r1 + (_Float16)r3;
+            r3 = (_Float16)r2 - (_Float16)r4;
+            r2 = (_Float16)r2 + (_Float16)r4;
+            pSrc[2 * i1] = (_Float16)r1 + (_Float16)r2;
+            r2 = (_Float16)r1 - (_Float16)r2;
+            s1 = (_Float16)pSrc[2 * i1 + 1] + (_Float16)pSrc[2 * i5 + 1];
+            s5 = (_Float16)pSrc[2 * i1 + 1] - (_Float16)pSrc[2 * i5 + 1];
+            s2 = (_Float16)pSrc[2 * i2 + 1] + (_Float16)pSrc[2 * i6 + 1];
+            s6 = (_Float16)pSrc[2 * i2 + 1] - (_Float16)pSrc[2 * i6 + 1];
+            s3 = (_Float16)pSrc[2 * i3 + 1] + (_Float16)pSrc[2 * i7 + 1];
+            s7 = (_Float16)pSrc[2 * i3 + 1] - (_Float16)pSrc[2 * i7 + 1];
+            s4 = (_Float16)pSrc[2 * i4 + 1] + (_Float16)pSrc[2 * i8 + 1];
+            s8 = (_Float16)pSrc[2 * i4 + 1] - (_Float16)pSrc[2 * i8 + 1];
+            t2 = (_Float16)s1 - (_Float16)s3;
+            s1 = (_Float16)s1 + (_Float16)s3;
+            s3 = (_Float16)s2 - (_Float16)s4;
+            s2 = (_Float16)s2 + (_Float16)s4;
+            r1 = (_Float16)t1 + (_Float16)s3;
+            t1 = (_Float16)t1 - (_Float16)s3;
+            pSrc[2 * i1 + 1] = (_Float16)s1 + (_Float16)s2;
+            s2 = (_Float16)s1 - (_Float16)s2;
+            s1 = (_Float16)t2 - (_Float16)r3;
+            t2 = (_Float16)t2 + (_Float16)r3;
+            p1 = (_Float16)co5 * (_Float16)r2;
+            p2 = (_Float16)si5 * (_Float16)s2;
+            p3 = (_Float16)co5 * (_Float16)s2;
+            p4 = (_Float16)si5 * (_Float16)r2;
+            pSrc[2 * i5]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i5 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co3 * (_Float16)r1;
+            p2 = (_Float16)si3 * (_Float16)s1;
+            p3 = (_Float16)co3 * (_Float16)s1;
+            p4 = (_Float16)si3 * (_Float16)r1;
+            pSrc[2 * i3]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i3 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co7 * (_Float16)t1;
+            p2 = (_Float16)si7 * (_Float16)t2;
+            p3 = (_Float16)co7 * (_Float16)t2;
+            p4 = (_Float16)si7 * (_Float16)t1;
+            pSrc[2 * i7]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i7 + 1] = (_Float16)p3 - (_Float16)p4;
+            r1 = ((_Float16)r6 - (_Float16)r8) * (_Float16)C81;
+            r6 = ((_Float16)r6 + (_Float16)r8) * (_Float16)C81;
+            s1 = ((_Float16)s6 - (_Float16)s8) * (_Float16)C81;
+            s6 = ((_Float16)s6 + (_Float16)s8) * (_Float16)C81;
+            t1 = (_Float16)r5 - (_Float16)r1;
+            r5 = (_Float16)r5 + (_Float16)r1;
+            r8 = (_Float16)r7 - (_Float16)r6;
+            r7 = (_Float16)r7 + (_Float16)r6;
+            t2 = (_Float16)s5 - (_Float16)s1;
+            s5 = (_Float16)s5 + (_Float16)s1;
+            s8 = (_Float16)s7 - (_Float16)s6;
+            s7 = (_Float16)s7 + (_Float16)s6;
+            r1 = (_Float16)r5 + (_Float16)s7;
+            r5 = (_Float16)r5 - (_Float16)s7;
+            r6 = (_Float16)t1 + (_Float16)s8;
+            t1 = (_Float16)t1 - (_Float16)s8;
+            s1 = (_Float16)s5 - (_Float16)r7;
+            s5 = (_Float16)s5 + (_Float16)r7;
+            s6 = (_Float16)t2 - (_Float16)r8;
+            t2 = (_Float16)t2 + (_Float16)r8;
+            p1 = (_Float16)co2 * (_Float16)r1;
+            p2 = (_Float16)si2 * (_Float16)s1;
+            p3 = (_Float16)co2 * (_Float16)s1;
+            p4 = (_Float16)si2 * (_Float16)r1;
+            pSrc[2 * i2]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i2 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co8 * (_Float16)r5;
+            p2 = (_Float16)si8 * (_Float16)s5;
+            p3 = (_Float16)co8 * (_Float16)s5;
+            p4 = (_Float16)si8 * (_Float16)r5;
+            pSrc[2 * i8]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i8 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co6 * (_Float16)r6;
+            p2 = (_Float16)si6 * (_Float16)s6;
+            p3 = (_Float16)co6 * (_Float16)s6;
+            p4 = (_Float16)si6 * (_Float16)r6;
+            pSrc[2 * i6]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i6 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co4 * (_Float16)t1;
+            p2 = (_Float16)si4 * (_Float16)t2;
+            p3 = (_Float16)co4 * (_Float16)t2;
+            p4 = (_Float16)si4 * (_Float16)t1;
+            pSrc[2 * i4]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i4 + 1] = (_Float16)p3 - (_Float16)p4;
 
             i1 += n1;
          } while (i1 < fftLen);
@@ -286,4 +286,4 @@
    } while (n2 > 7);
 }
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c b/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c
index f5e6f15..9048f83 100755
--- a/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c
@@ -72,15 +72,15 @@
    twI = *pCoeff++ ;
 
    // U1 = XA(1) + XB(1); % It is real
-   t1a = xBR + xAR  ;
+   t1a = (_Float16)xBR + (_Float16)xAR  ;
 
    // U2 = XB(1) - XA(1); % It is imaginary
-   t1b = xBI + xAI  ;
+   t1b = (_Float16)xBI + (_Float16)xAI  ;
 
    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-   *pOut++ = 0.5f * ( t1a + t1b );
-   *pOut++ = 0.5f * ( t1a - t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a + (_Float16)t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a - (_Float16)t1b );
 
    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
    pB  = p + 2*k - 14;
@@ -174,18 +174,18 @@
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xBR - xAR ;
-      t1b = xBI + xAI ;
+      t1a = (_Float16)xBR - (_Float16)xAR ;
+      t1b = (_Float16)xBI + (_Float16)xAI ;
 
       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-      p0 = twR * t1a;
-      p1 = twI * t1a;
-      p2 = twR * t1b;
-      p3 = twI * t1b;
+      p0 = (_Float16)twR * (_Float16)t1a;
+      p1 = (_Float16)twI * (_Float16)t1a;
+      p2 = (_Float16)twR * (_Float16)t1b;
+      p3 = (_Float16)twI * (_Float16)t1b;
 
-      *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR + (_Float16)p0 + (_Float16)p3 ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)p1 - (_Float16)p2 ); //xAI
 
       pA += 2;
       pB -= 2;
@@ -223,8 +223,8 @@
 
    pCoeff += 2 ;
 
-   *pOut++ = 0.5f * ( xAR + xAI );
-   *pOut++ = 0.5f * ( xAR - xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR + (_Float16)xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR - (_Float16)xAI );
 
    pB  =  p + 2*k - 14;
    pA +=  2    ;
@@ -293,18 +293,18 @@
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xAR - xBR ;
-      t1b = xAI + xBI ;
+      t1a = (_Float16)xAR - (_Float16)xBR ;
+      t1b = (_Float16)xAI + (_Float16)xBI ;
 
-      r = twR * t1a;
-      s = twI * t1b;
-      t = twI * t1a;
-      u = twR * t1b;
+      r = (_Float16)twR * (_Float16)t1a;
+      s = (_Float16)twI * (_Float16)t1b;
+      t = (_Float16)twI * (_Float16)t1a;
+      u = (_Float16)twR * (_Float16)t1b;
 
       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
-      *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR - (_Float16)r - (_Float16)s ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)t - (_Float16)u ); //xAI
 
       pA += 2;
       pB -= 2;
@@ -342,15 +342,15 @@
 
 
    // U1 = XA(1) + XB(1); % It is real
-   t1a = xBR + xAR  ;
+   t1a = (_Float16)xBR + (_Float16)xAR  ;
 
    // U2 = XB(1) - XA(1); % It is imaginary
-   t1b = xBI + xAI  ;
+   t1b = (_Float16)xBI + (_Float16)xAI  ;
 
    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-   *pOut++ = 0.5f * ( t1a + t1b );
-   *pOut++ = 0.5f * ( t1a - t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a + (_Float16)t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a - (_Float16)t1b );
 
    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
    pB  = p + 2*k;
@@ -381,18 +381,18 @@
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xBR - xAR ;
-      t1b = xBI + xAI ;
+      t1a = (_Float16)xBR - (_Float16)xAR ;
+      t1b = (_Float16)xBI + (_Float16)xAI ;
 
       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-      p0 = twR * t1a;
-      p1 = twI * t1a;
-      p2 = twR * t1b;
-      p3 = twI * t1b;
+      p0 = (_Float16)twR * (_Float16)t1a;
+      p1 = (_Float16)twI * (_Float16)t1a;
+      p2 = (_Float16)twR * (_Float16)t1b;
+      p3 = (_Float16)twI * (_Float16)t1b;
 
-      *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR + (_Float16)p0 + (_Float16)p3 ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)p1 - (_Float16)p2 ); //xAI
 
 
       pA += 2;
@@ -422,8 +422,8 @@
 
    pCoeff += 2 ;
 
-   *pOut++ = 0.5f * ( xAR + xAI );
-   *pOut++ = 0.5f * ( xAR - xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR + (_Float16)xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR - (_Float16)xAI );
 
    pB  =  p + 2*k ;
    pA +=  2	   ;
@@ -441,18 +441,18 @@
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xAR - xBR ;
-      t1b = xAI + xBI ;
+      t1a = (_Float16)xAR - (_Float16)xBR ;
+      t1b = (_Float16)xAI + (_Float16)xBI ;
 
-      r = twR * t1a;
-      s = twI * t1b;
-      t = twI * t1a;
-      u = twR * t1b;
+      r = (_Float16)twR * (_Float16)t1a;
+      s = (_Float16)twI * (_Float16)t1b;
+      t = (_Float16)twI * (_Float16)t1a;
+      u = (_Float16)twR * (_Float16)t1b;
 
       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
-      *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR - (_Float16)r - (_Float16)s ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)t - (_Float16)u ); //xAI
 
       pA += 2;
       pB -= 2;
diff --git a/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c b/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c
index c74f1ff..c6790cb 100755
--- a/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c
+++ b/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c
@@ -354,4 +354,4 @@
   @} end of RealFFT group
  */
 
-#endif /*  #if defined(ARM_FLOAT16_SUPPORTED) */
\ No newline at end of file
+#endif /*  #if defined(ARM_FLOAT16_SUPPORTED) */
diff --git a/CMSIS/DoxyGen/DSP/src/history.txt b/CMSIS/DoxyGen/DSP/src/history.txt
index 6361a5e..8ca5d9e 100644
--- a/CMSIS/DoxyGen/DSP/src/history.txt
+++ b/CMSIS/DoxyGen/DSP/src/history.txt
@@ -7,6 +7,13 @@
     <th>Description</th>
   </tr>
   <tr>
+    <td>V1.10.0</td>
+    <td>
+      Float promotion issues:
+      - Removed remaining issues of implicit float promotion (mainly in f16 code)
+    </td>
+  </tr>
+  <tr>
     <td>V1.9.0</td>
     <td>
       Re-organization of arm_math.h