CMSIS-DSP: Added support for Helium.
Only arm_dot_prod_f32 is currently providing an Helium implementation.
diff --git a/CMSIS/DSP/Include/arm_helium_utils.h b/CMSIS/DSP/Include/arm_helium_utils.h
new file mode 100755
index 0000000..fd4c707
--- /dev/null
+++ b/CMSIS/DSP/Include/arm_helium_utils.h
@@ -0,0 +1,73 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_helium_utils.h
+ * Description: Utility functions for Helium development
+ *
+ * $Date: 09. September 2019
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ARM_UTILS_HELIUM_H_
+#define _ARM_UTILS_HELIUM_H_
+
+#if defined (ARM_MATH_HELIUM)
+
+#define nbLanes(sz) (128/sz)
+
+#define VEC_LANES_F32 nbLanes(32)
+#define VEC_LANES_F16 nbLanes(16)
+#define VEC_LANES_Q63 nbLanes(64)
+#define VEC_LANES_Q31 nbLanes(32)
+#define VEC_LANES_Q15 nbLanes(16)
+#define VEC_LANES_Q7 nbLanes(8)
+
+#define nb_vec_lanes(ptr) _Generic((ptr), \
+ uint32_t *: VEC_LANES_Q31, \
+ uint16_t *: VEC_LANES_Q15, \
+ uint8_t *: VEC_LANES_Q7, \
+ q31_t *: VEC_LANES_Q31, \
+ q15_t *: VEC_LANES_Q15, \
+ q7_t *: VEC_LANES_Q7, \
+ float32_t*: VEC_LANES_F32, \
+ float16_t*: VEC_LANES_F16, \
+ const q31_t *: VEC_LANES_Q31, \
+ const q15_t *: VEC_LANES_Q15, \
+ const q7_t *: VEC_LANES_Q7, \
+ const float32_t*: VEC_LANES_F32, \
+ const float16_t*: VEC_LANES_F16, \
+ default: "err")
+
+__STATIC_FORCEINLINE float32_t vecAddAcrossF32Mve(float32x4_t in)
+{
+ float32_t acc;
+
+ acc = vgetq_lane(in, 0) + vgetq_lane(in, 1) +
+ vgetq_lane(in, 2) + vgetq_lane(in, 3);
+
+ return acc;
+}
+
+#define post_incr_vec_size(ptr) ptr += nb_vec_lanes(ptr)
+
+#endif
+
+#endif
\ No newline at end of file
diff --git a/CMSIS/DSP/Platforms/FVP/ARMv81MML/Startup/AC6/startup_ARMv81MML.c b/CMSIS/DSP/Platforms/FVP/ARMv81MML/Startup/AC6/startup_ARMv81MML.c
index b373ffc..eccf725 100755
--- a/CMSIS/DSP/Platforms/FVP/ARMv81MML/Startup/AC6/startup_ARMv81MML.c
+++ b/CMSIS/DSP/Platforms/FVP/ARMv81MML/Startup/AC6/startup_ARMv81MML.c
@@ -128,6 +128,7 @@
__set_MSPLIM((uint32_t)(&__STACK_LIMIT));
SystemInit(); /* CMSIS System Initialization */
+
__PROGRAM_START(); /* Enter PreMain (C library entry point) */
}
diff --git a/CMSIS/DSP/Platforms/FVP/ARMv81MML/system_ARMv81MML.c b/CMSIS/DSP/Platforms/FVP/ARMv81MML/system_ARMv81MML.c
index 2919ec0..75e5164 100644
--- a/CMSIS/DSP/Platforms/FVP/ARMv81MML/system_ARMv81MML.c
+++ b/CMSIS/DSP/Platforms/FVP/ARMv81MML/system_ARMv81MML.c
@@ -40,6 +40,11 @@
#define SYSTEM_CLOCK (5U * XTAL)
+#define DEBUG_DEMCR (*((unsigned int *)0xE000EDFC))
+#define DEBUG_TRCENA (1<<24) //Global debug enable bit
+
+#define CCR (*((volatile unsigned int *)0xE000ED14))
+#define CCR_DL (1 << 19)
/*----------------------------------------------------------------------------
Externals
@@ -88,4 +93,10 @@
SystemCoreClock = SYSTEM_CLOCK;
+ //Disable debug
+ DEBUG_DEMCR &=~ DEBUG_TRCENA;
+
+ // enable DL branch cache
+ CCR |= CCR_DL;
+
}
diff --git a/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c b/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c
index 3eee3b9..99ca7dc 100644
--- a/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c
+++ b/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c
@@ -59,6 +59,46 @@
@return none
*/
+#if defined (ARM_MATH_HELIUM)
+
+#include "arm_mve.h"
+#include "arm_helium_utils.h"
+
+void arm_dot_prod_f32(
+ const float32_t * pSrcA,
+ const float32_t * pSrcB,
+ uint32_t blockSize,
+ float32_t * result)
+{
+ float32x4_t vecA, vecB;
+ float32x4_t vecSum;
+ vecSum = vdupq_n_f32(0.0);
+
+ do {
+ /*
+ * C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
+ * Calculate dot product and then store the result in a temporary buffer.
+ */
+ mve_pred16_t p = vctp32q(blockSize);
+
+ vecA = vldrwq_z_f32(pSrcA, p);
+ vecB = vldrwq_z_f32(pSrcB, p);
+ vecSum = vfmaq_m(vecSum, vecA, vecB, p);
+ /*
+ * Decrement the blockSize loop counter
+ * Advance vector source and destination pointers
+ */
+ post_incr_vec_size(pSrcA);
+ post_incr_vec_size(pSrcB);
+ blockSize -= VEC_LANES_F32;
+ }
+ while ((int32_t) blockSize > 0);
+
+ *result = vecAddAcrossF32Mve(vecSum);
+}
+
+#else
+
void arm_dot_prod_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
@@ -158,6 +198,7 @@
*result = sum;
}
+#endif /* ARM_MATH_HELIUM */
/**
@} end of BasicDotProd group
*/
diff --git a/CMSIS/DSP/Source/CMakeLists.txt b/CMSIS/DSP/Source/CMakeLists.txt
index ac56523..8248c21 100755
--- a/CMSIS/DSP/Source/CMakeLists.txt
+++ b/CMSIS/DSP/Source/CMakeLists.txt
@@ -17,6 +17,7 @@
option(LOOPUNROLL "Loop unrolling" ON)
option(ROUNDING "Rounding" OFF)
option(MATRIXCHECK "Matrix Checks" OFF)
+option(HELIUM "Helium acceleration" OFF)
# Select which parts of the CMSIS-DSP must be compiled.
# There are some dependencies between the parts but they are not tracked
diff --git a/CMSIS/DSP/Testing/CMakeLists.txt b/CMSIS/DSP/Testing/CMakeLists.txt
index de00fe2..be96fef 100644
--- a/CMSIS/DSP/Testing/CMakeLists.txt
+++ b/CMSIS/DSP/Testing/CMakeLists.txt
@@ -7,7 +7,7 @@
function(writeConfig path)
set(output "")
- list(APPEND output "OPTIMIZED,HARDFP,FASTMATH,NEON,UNROLL,ROUNDING,PLATFORM,CORE,COMPILER,VERSION\n")
+ list(APPEND output "OPTIMIZED,HARDFP,FASTMATH,NEON,HELIUM,UNROLL,ROUNDING,PLATFORM,CORE,COMPILER,VERSION\n")
if (OPTIMIZED)
list(APPEND output "1")
@@ -33,6 +33,12 @@
list(APPEND output ",0")
endif()
+ if (HELIUM)
+ list(APPEND output ",1")
+ else()
+ list(APPEND output ",0")
+ endif()
+
if (LOOPUNROLL)
list(APPEND output ",1")
else()
diff --git a/CMSIS/DSP/Testing/addToDB.py b/CMSIS/DSP/Testing/addToDB.py
index 514df54..bdf570a 100755
--- a/CMSIS/DSP/Testing/addToDB.py
+++ b/CMSIS/DSP/Testing/addToDB.py
@@ -19,7 +19,7 @@
# For table creation
MKSTRFIELD=['NAME']
-MKBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'UNROLL', 'ROUNDING','OPTIMIZED']
+MKBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'HELIUM','UNROLL', 'ROUNDING','OPTIMIZED']
MKINTFIELD=['ID', 'CYCLES']
MKDATEFIELD=['DATE']
MKKEYFIELD=['CATEGORY', 'PLATFORM', 'CORE', 'COMPILER','TYPE']
@@ -31,7 +31,7 @@
# For table value extraction
VALSTRFIELD=['NAME','VERSION']
-VALBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'UNROLL', 'ROUNDING','OPTIMIZED']
+VALBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'HELIUM','UNROLL', 'ROUNDING','OPTIMIZED']
VALINTFIELD=['ID', 'CYCLES']
VALDATEFIELD=['DATE']
VALKEYFIELD=['CATEGORY', 'PLATFORM', 'CORE', 'COMPILER','TYPE']
@@ -246,16 +246,17 @@
conn.commit()
def addOneBenchmark(elem,fullPath,db,group):
- full=pd.read_csv(fullPath,dtype={'OLDID': str} ,keep_default_na = False)
- full['DATE'] = datetime.datetime.now()
- if group:
- tableName = group
- else:
- tableName = elem.data["class"]
- conn = sqlite3.connect(db)
- createTableIfMissing(conn,elem,tableName,full)
- addRows(conn,elem,tableName,full)
- conn.close()
+ if os.path.isfile(fullPath):
+ full=pd.read_csv(fullPath,dtype={'OLDID': str} ,keep_default_na = False)
+ full['DATE'] = datetime.datetime.now()
+ if group:
+ tableName = group
+ else:
+ tableName = elem.data["class"]
+ conn = sqlite3.connect(db)
+ createTableIfMissing(conn,elem,tableName,full)
+ addRows(conn,elem,tableName,full)
+ conn.close()
def addToDB(benchmark,dbpath,elem,group):
diff --git a/CMSIS/DSP/Testing/addToRegDB.py b/CMSIS/DSP/Testing/addToRegDB.py
index 50c3e76..acbf3fd 100755
--- a/CMSIS/DSP/Testing/addToRegDB.py
+++ b/CMSIS/DSP/Testing/addToRegDB.py
@@ -19,7 +19,7 @@
# For table creation
MKSTRFIELD=['NAME','Regression']
-MKBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'UNROLL', 'ROUNDING','OPTIMIZED']
+MKBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'HELIUM','UNROLL', 'ROUNDING','OPTIMIZED']
MKINTFIELD=['ID','MAX']
MKREALFIELD=['MAXREGCOEF']
MKDATEFIELD=['DATE']
@@ -32,7 +32,7 @@
# For table value extraction
VALSTRFIELD=['NAME','VERSION','Regression']
-VALBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'UNROLL', 'ROUNDING','OPTIMIZED']
+VALBOOLFIELD=['HARDFP', 'FASTMATH', 'NEON', 'HELIUM','UNROLL', 'ROUNDING','OPTIMIZED']
VALINTFIELD=['ID', 'MAX']
VALREALFIELD=['MAXREGCOEF']
VALDATEFIELD=['DATE']
@@ -257,16 +257,17 @@
conn.commit()
def addOneBenchmark(elem,fullPath,db,group):
- full=pd.read_csv(fullPath,dtype={'OLDID': str} ,keep_default_na = False)
- full['DATE'] = datetime.datetime.now()
- if group:
- tableName = group
- else:
- tableName = elem.data["class"]
- conn = sqlite3.connect(db)
- createTableIfMissing(conn,elem,tableName,full)
- addRows(conn,elem,tableName,full)
- conn.close()
+ if os.path.isfile(fullPath):
+ full=pd.read_csv(fullPath,dtype={'OLDID': str} ,keep_default_na = False)
+ full['DATE'] = datetime.datetime.now()
+ if group:
+ tableName = group
+ else:
+ tableName = elem.data["class"]
+ conn = sqlite3.connect(db)
+ createTableIfMissing(conn,elem,tableName,full)
+ addRows(conn,elem,tableName,full)
+ conn.close()
def addToDB(benchmark,dbpath,elem,group):
diff --git a/CMSIS/DSP/Testing/createDb.sql b/CMSIS/DSP/Testing/createDb.sql
index 64737b3..2c9b161 100755
--- a/CMSIS/DSP/Testing/createDb.sql
+++ b/CMSIS/DSP/Testing/createDb.sql
@@ -70,3 +70,4 @@
INSERT INTO CORE VALUES(10,"a7","ARMCA7");
INSERT INTO CORE VALUES(11,"a9","ARMCA9");
INSERT INTO CORE VALUES(12,"a15","ARMCA15");
+INSERT INTO CORE VALUES(13,"helium","ARMv81MML_DSP_DP_MVE_FP");
diff --git a/CMSIS/DSP/Testing/summaryBench.py b/CMSIS/DSP/Testing/summaryBench.py
index 14cb2be..ff758bc 100644
--- a/CMSIS/DSP/Testing/summaryBench.py
+++ b/CMSIS/DSP/Testing/summaryBench.py
@@ -57,38 +57,40 @@
def summaryBenchmark(resultPath,elem,path):
regressionPath=os.path.join(os.path.dirname(path),"regression.csv")
- print(" Generating %s" % regressionPath)
- full=pd.read_csv(path,dtype={'OLDID': str} ,keep_default_na = False)
- #print(full)
-
- csvheaders = []
- with open(os.path.join(resultPath,'currentConfig.csv'), 'r') as f:
- reader = csv.reader(f)
- csvheaders = next(reader, None)
- groupList = list(set(elem.params.full) - set(elem.params.summary))
- #grouped=full.groupby(list(elem.params.summary) + ['ID','CATEGORY']).max()
- #grouped.reset_index(level=grouped.index.names, inplace=True)
- #print(grouped)
- #print(grouped.columns)
+ if os.path.isfile(path):
+ print(" Generating %s" % regressionPath)
+ full=pd.read_csv(path,dtype={'OLDID': str} ,keep_default_na = False)
+ #print(full)
+
+ csvheaders = []
+ with open(os.path.join(resultPath,'currentConfig.csv'), 'r') as f:
+ reader = csv.reader(f)
+ csvheaders = next(reader, None)
+
+ groupList = list(set(elem.params.full) - set(elem.params.summary))
+ #grouped=full.groupby(list(elem.params.summary) + ['ID','CATEGORY']).max()
+ #grouped.reset_index(level=grouped.index.names, inplace=True)
+ #print(grouped)
+ #print(grouped.columns)
- def reg(d):
- m=d["CYCLES"].max()
- results = smf.ols('CYCLES ~ ' + elem.params.formula, data=d).fit()
- f=joinit([formatProd(a,b) for (a,b) in zip(results.params.index,results.params.values)]," + ")
- f="".join(f)
- f = re.sub(r':','*',f)
- #print(results.summary())
- return(pd.Series({'Regression':"%s" % f,'MAX' : m,'MAXREGCOEF' : results.params.values[-1]}))
-
- regList = ['ID','OLDID','CATEGORY','NAME'] + csvheaders + groupList
+ def reg(d):
+ m=d["CYCLES"].max()
+ results = smf.ols('CYCLES ~ ' + elem.params.formula, data=d).fit()
+ f=joinit([formatProd(a,b) for (a,b) in zip(results.params.index,results.params.values)]," + ")
+ f="".join(f)
+ f = re.sub(r':','*',f)
+ #print(results.summary())
+ return(pd.Series({'Regression':"%s" % f,'MAX' : m,'MAXREGCOEF' : results.params.values[-1]}))
- regression=full.groupby(regList).apply(reg)
- regression.reset_index(level=regression.index.names, inplace=True)
- renamingDict = { a : b for (a,b) in zip(elem.params.full,elem.params.paramNames)}
- regression = regression.rename(columns=renamingDict)
- regression.to_csv(regressionPath,index=False,quoting=csv.QUOTE_NONNUMERIC)
+ regList = ['ID','OLDID','CATEGORY','NAME'] + csvheaders + groupList
+
+ regression=full.groupby(regList).apply(reg)
+ regression.reset_index(level=regression.index.names, inplace=True)
+ renamingDict = { a : b for (a,b) in zip(elem.params.full,elem.params.paramNames)}
+ regression = regression.rename(columns=renamingDict)
+ regression.to_csv(regressionPath,index=False,quoting=csv.QUOTE_NONNUMERIC)
def extractBenchmarks(resultPath,benchmark,elem):
diff --git a/CMSIS/DSP/Testing/testmain.cpp b/CMSIS/DSP/Testing/testmain.cpp
index eb5014a..51d7dd1 100644
--- a/CMSIS/DSP/Testing/testmain.cpp
+++ b/CMSIS/DSP/Testing/testmain.cpp
@@ -18,9 +18,13 @@
#include "Patterns.h"
+
int testmain()
{
char *memoryBuf=NULL;
+
+
+
memoryBuf = (char*)malloc(MEMSIZE);
if (memoryBuf !=NULL)
diff --git a/CMSIS/DSP/configCore.cmake b/CMSIS/DSP/configCore.cmake
index eda57ec..f7a24f6 100644
--- a/CMSIS/DSP/configCore.cmake
+++ b/CMSIS/DSP/configCore.cmake
@@ -180,7 +180,6 @@
#
if (NEON AND NOT CORTEXM)
- #target_compile_definitions(${PROJECTNAME} PRIVATE ARM_MATH_NEON __FPU_PRESENT)
target_compile_definitions(${PROJECTNAME} PRIVATE ARM_MATH_NEON)
endif()
@@ -189,6 +188,10 @@
target_compile_definitions(${PROJECTNAME} PRIVATE ARM_MATH_NEON_EXPERIMENTAL)
endif()
+ if (HELIUM AND CORTEXM)
+ target_compile_definitions(${PROJECTNAME} PRIVATE ARM_MATH_HELIUM)
+ endif()
+
compilerSpecificCompileOptions(${PROJECTNAME} ${ROOT})
endfunction()
\ No newline at end of file