CMSIS-DSP: Improvement to testing scripts
diff --git a/CMSIS/DSP/Testing/extractDb.py b/CMSIS/DSP/Testing/extractDb.py
new file mode 100755
index 0000000..13595bd
--- /dev/null
+++ b/CMSIS/DSP/Testing/extractDb.py
@@ -0,0 +1,283 @@
+import argparse
+import sqlite3
+import re
+import pandas as pd
+import numpy as np
+
+# Command to get last runid 
+lastID="""SELECT runid FROM RUN ORDER BY runid DESC LIMIT 1
+"""
+
+def getLastRunID():
+  r=c.execute(lastID)
+  return(int(r.fetchone()[0]))
+
+
+runid = 1
+
+parser = argparse.ArgumentParser(description='Generate summary benchmarks')
+
+parser.add_argument('-b', nargs='?',type = str, default="bench.db", help="Benchmark database")
+parser.add_argument('-o', nargs='?',type = str, default="full.md", help="Full summary")
+parser.add_argument('-r', action='store_true', help="Regression database")
+
+# For runid or runid range
+parser.add_argument('others', nargs=argparse.REMAINDER)
+
+args = parser.parse_args()
+
+c = sqlite3.connect(args.b)
+
+if args.others:
+   runid=int(args.others[0])
+else:
+   runid=getLastRunID()
+
+# We extract data only from data tables
+# Those tables below are used for descriptions
+REMOVETABLES=['RUN','CORE', 'PLATFORM', 'COMPILERKIND', 'COMPILER', 'TYPE', 'CATEGORY', 'CONFIG']
+
+# This is assuming the database is generated by the regression script
+# So platform is the same for all benchmarks.
+# Category and type is coming from the test name in the yaml
+# So no need to add this information here
+# Name is removed here because it is added at the beginning
+REMOVECOLUMNS=['runid','NAME','type','platform','category','coredef','OPTIMIZED','HARDFP','FASTMATH','NEON','HELIUM','UNROLL','ROUNDING','DATE','compilerkindid','date','categoryid', 'ID', 'platformid', 'coreid', 'compilerid', 'typeid']
+
+# Get existing benchmark tables
+def getBenchTables():
+    r=c.execute("SELECT name FROM sqlite_master WHERE type='table'")
+    benchtables=[]
+    for table in r:
+        if not table[0] in REMOVETABLES:
+          benchtables.append(table[0])
+    return(benchtables)
+
+# get existing types in a table
+def getExistingTypes(benchTable):
+    r=c.execute("select distinct typeid from %s" % benchTable).fetchall()
+    result=[x[0] for x in r]
+    return(result)
+
+# Get compilers from specific type and table
+versioncompiler="""select distinct compiler,version from %s 
+  INNER JOIN COMPILER USING(compilerid)
+  INNER JOIN COMPILERKIND USING(compilerkindid) WHERE typeid=?"""
+
+# Get existing compiler in a table for a specific type
+# (In case report is structured by types)
+def getExistingCompiler(benchTable,typeid):
+    r=c.execute(versioncompiler % benchTable,(typeid,)).fetchall()
+    return(r)
+
+# Get type name from type id
+def getTypeName(typeid):
+    r=c.execute("select type from TYPE where typeid=?",(typeid,)).fetchone()
+    return(r[0])
+ 
+# Diff of 2 lists 
+def diff(first, second):
+        second = set(second)
+        return [item for item in first if item not in second]
+
+
+# Command to get data for specific compiler 
+# and type
+benchCmd="""select %s from %s
+  INNER JOIN CATEGORY USING(categoryid)
+  INNER JOIN PLATFORM USING(platformid)
+  INNER JOIN CORE USING(coreid)
+  INNER JOIN COMPILER USING(compilerid)
+  INNER JOIN COMPILERKIND USING(compilerkindid)
+  INNER JOIN TYPE USING(typeid)
+  WHERE compiler=? AND VERSION=? AND typeid = ? AND runid = ?
+  """
+
+# Command to get test names for specific compiler 
+# and type
+benchNames="""select distinct NAME from %s
+  INNER JOIN COMPILER USING(compilerid)
+  INNER JOIN COMPILERKIND USING(compilerkindid)
+  INNER JOIN TYPE USING(typeid)
+  WHERE compiler=? AND VERSION=? AND typeid = ? AND runid = ?
+  """
+
+# Command to get columns for specific table
+benchCmdColumns="""select * from %s
+  INNER JOIN CATEGORY USING(categoryid)
+  INNER JOIN PLATFORM USING(platformid)
+  INNER JOIN CORE USING(coreid)
+  INNER JOIN COMPILER USING(compilerid)
+  INNER JOIN COMPILERKIND USING(compilerkindid)
+  INNER JOIN TYPE USING(typeid)
+  """
+
+def joinit(iterable, delimiter):
+    it = iter(iterable)
+    yield next(it)
+    for x in it:
+        yield delimiter
+        yield x
+
+# Is not a column name finishing by id 
+# (often primary key for thetable)
+def isNotIDColumn(col):
+    if re.match(r'^.*id$',col):
+        return(False)
+    else:
+        return(True)
+    
+# Get test names
+# for specific typeid and compiler (for the data)
+def getTestNames(benchTable,comp,typeid):
+    vals=(comp[0],comp[1],typeid,runid)
+    result=c.execute(benchNames % benchTable,vals).fetchall()
+    return([x[0] for x in list(result)])
+
+# Get names of columns and data for a table
+# for specific typeid and compiler (for the data)
+def getColNamesAndData(benchTable,comp,typeid):
+    cursor=c.cursor()
+    result=cursor.execute(benchCmdColumns % (benchTable))
+    cols= [member[0] for member in cursor.description]
+    keepCols = ['NAME'] + [c for c in diff(cols , REMOVECOLUMNS) if isNotIDColumn(c)]
+    keepColsStr = "".join(joinit(keepCols,","))
+    vals=(comp[0],comp[1],typeid,runid)
+    result=cursor.execute(benchCmd % (keepColsStr,benchTable),vals)
+    vals =np.array([list(x) for x in list(result)])
+    return(keepCols,vals)
+
+# Write columns in markdown format
+def writeColumns(f,cols):
+    colStr = "".join(joinit(cols,"|"))
+    f.write("|")
+    f.write(colStr)
+    f.write("|\n")
+    sepStr="".join(joinit([":-:" for x in cols],"|"))
+    f.write("|")
+    f.write(sepStr)
+    f.write("|\n")
+
+# Write row in markdown format
+def writeRow(f,row):
+    row=[str(x) for x in row]
+    rowStr = "".join(joinit(row,"|"))
+    f.write("|")
+    f.write(rowStr)
+    f.write("|\n")
+
+PARAMS=["NB","NumTaps", "NBA", "NBB", "Factor", "NumStages","VECDIM","NBR","NBC","NBI","IFFT", "BITREV"]
+
+def regressionTableFor(name,output,ref,toSort,indexCols,field):
+    data=ref.pivot_table(index=indexCols, columns='core', 
+    values=[field], aggfunc='first')
+       
+    data=data.sort_values(toSort)
+       
+    cores = [c[1] for c in list(data.columns)]
+    columns = diff(indexCols,['NAME']) + cores
+
+    writeColumns(output,columns)
+    dataForFunc=data.loc[name]
+    if type(dataForFunc) is pd.DataFrame:
+       for row in dataForFunc.itertuples():
+           row=list(row)
+           if type(row[0]) is int:
+              row=[row[0]] + row[1:]
+           else: 
+              row=list(row[0]) + row[1:]
+           writeRow(output,row)
+    else:
+       writeRow(output,dataForFunc)
+
+def formatTableByCore(output,testNames,cols,vals):
+    if vals.size != 0:
+       ref=pd.DataFrame(vals,columns=cols)
+       toSort=["NAME"]
+       
+       for param in PARAMS:
+          if param in ref.columns:
+             ref[param]=pd.to_numeric(ref[param])
+             toSort.append(param)
+       if args.r:
+         #  Regression table
+         ref['MAX']=pd.to_numeric(ref['MAX'])
+         ref['MAXREGCOEF']=pd.to_numeric(ref['MAXREGCOEF'])
+       
+         indexCols=diff(cols,['core','Regression','MAXREGCOEF','MAX','version','compiler'])
+         valList = ['Regression']
+       else:
+         ref['CYCLES']=pd.to_numeric(ref['CYCLES'])
+       
+         indexCols=diff(cols,['core','CYCLES','version','compiler'])
+         valList = ['CYCLES']
+      
+       
+
+       for name in testNames:
+           if args.r:
+              output.write("#### %s\n" % name)
+
+              output.write("##### Regression\n" )
+              regressionTableFor(name,output,ref,toSort,indexCols,'Regression')
+              
+              output.write("##### Max cycles\n" )
+              regressionTableFor(name,output,ref,toSort,indexCols,'MAX')
+              
+              output.write("##### Max Reg Coef\n" )
+              regressionTableFor(name,output,ref,toSort,indexCols,'MAXREGCOEF')
+
+           else:
+              data=ref.pivot_table(index=indexCols, columns='core', 
+              values=valList, aggfunc='first')
+       
+              data=data.sort_values(toSort)
+       
+              cores = [c[1] for c in list(data.columns)]
+              columns = diff(indexCols,['NAME']) + cores
+
+              output.write("#### %s\n" % name)
+              writeColumns(output,columns)
+              dataForFunc=data.loc[name]
+              if type(dataForFunc) is pd.DataFrame:
+                 for row in dataForFunc.itertuples():
+                     row=list(row)
+                     if type(row[0]) is int:
+                        row=[row[0]] + row[1:]
+                     else: 
+                        row=list(row[0]) + row[1:]
+                     writeRow(output,row)
+              else:
+                 writeRow(output,dataForFunc)
+
+# Add a report for each table
+def addReportFor(output,benchName):
+    print("Process %s\n" % benchName)
+    output.write("# %s\n" % benchName)
+    allTypes = getExistingTypes(benchName)
+    # Add report for each type
+    for aTypeID in allTypes:
+        typeName = getTypeName(aTypeID)
+        output.write("## %s\n" % typeName)
+        ## Add report for each compiler
+        allCompilers = getExistingCompiler(benchName,aTypeID)
+        for compiler in allCompilers:
+            #print(compiler)
+            output.write("### %s (%s)\n" % compiler)
+            cols,vals=getColNamesAndData(benchName,compiler,aTypeID)
+            names=getTestNames(benchName,compiler,aTypeID)
+            formatTableByCore(output,names,cols,vals)
+           
+
+
+
+
+try:
+  with open(args.o,"w") as output:
+      benchtables=getBenchTables()
+      for bench in benchtables:
+          addReportFor(output,bench)
+finally:
+     c.close()
+
+