refactor: miscellaneous fixes to visualizations

This change introduces some minor refactoring work to both
visualization scripts, namely:

- Comments reflowed to reach 80 characters per line
- Fixes to the category names for more recent versions of Tokei
- GNUPlot scripts no longer require a fixed number of columns
- Whitespace inconsistency fixes

SLOC-specific changes:

- Updated extended color palette to prevent color cycling
- Search directories can now be passed via arguments
- Parent directories no longer include their children's stats

Change-Id: I3f0ecb849a97f1f8008423772ddde4ca8c4771c2
Signed-off-by: Chris Kay <chris.kay@arm.com>
Co-authored-by: Weronika Wiesiolek <weronika.wiesiolek@arm.com>
diff --git a/script/graphs/README.rst b/script/graphs/README.rst
index f9db4a1..0860cb2 100644
--- a/script/graphs/README.rst
+++ b/script/graphs/README.rst
@@ -6,35 +6,41 @@
 
 All scripts produce a PNG graph on stdout and the data on stderr.
 
-Test Runs by category
----------------------
+Tests by Category
+-----------------
 
 The script `categorize-tests.bash`, and its associated awk and plot scripts,
-generate a stacked bar chart with bars representing groups of tests (l1 l2,
-etc.) and segments of the bars representing types. `categorize-tests.bash`
+generate a stacked bar chart with bars representing groups of tests (L1 L2,
+etc.) and segments of the bars representing types. ``categorize-tests.bash``
 accepts an argument to filter the tests included with grep.
 
-For example, the following will produce a graph of the juno-specific tests:
+For example, the following will produce a graph of the Juno-specific tests:
+
+.. code-block::
 
     bash categorize-tests.bash juno > juno-tests.png 2> juno-tests.txt
 
 Lines of Code by Module
 -----------------------
 
-The script `sloc-viz.bash`, and its associated plot script, generate a stacked
-bar chart where each bar is a module and the bars' segments represent programming
-languages (or documentation languages). This script will produce a graph for
-whatever directory it's run within, and has special logic that includes more
-detail when run from the Trusted Firmware - A project's root directory.
+The script ``sloc-viz.bash``, and its associated plot script, generate a stacked
+bar chart where each bar is a module and the bars' segments represent
+programming languages (or documentation languages). This script will produce a
+graph for whatever directory it's run within, and has special logic that
+includes more detail when run from the Trusted Firmware - A project's root
+directory.
 
 This script has additional requirements:
+
 * ``tokei`` - a quick source lines of code counting tool
 * ``jq`` - a JSON query language for the command line, version 1.6 or later
   as the ``--jsonargs`` option is required
 
-For example, when run from the root of TF-A, the following commandline will graph
-sloc of TF-A:
+For example, when run from the root of TF-A, the following command line will
+graph SLOC of TF-A:
+
+.. code-block::
 
     bash ../<this-repo>/script/graph/sloc-viz.bash > sloc.png 2> sloc.tsv
 
-*Copyright (c) 2021, Arm Limited. All rights reserved.*
+*Copyright (c) 2021-2022, Arm Limited. All rights reserved.*
diff --git a/script/graphs/categorize-tests.awk b/script/graphs/categorize-tests.awk
index b590c65..9f72eba 100644
--- a/script/graphs/categorize-tests.awk
+++ b/script/graphs/categorize-tests.awk
@@ -1,10 +1,11 @@
+#!/usr/bin/env awk
+
 #
-# Copyright (c) 2021 Arm Limited. All rights reserved.
+# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
-#!/usr/bin/env awk
-#
+
 # This is a script to categorize tests within this repo by type. This script is
 # intended to be run with the output of `find group -type f`, run from within
 # the root directory of this repo piped into it. See the bash script with the
@@ -14,37 +15,54 @@
 	# We're breaking records upon the "/" character so that we can have an
 	# aggregation that's keyed by test group, if we want to.
 	FS = "/";
+
+	categories[0] = "\"L1\"";
+	categories[1] = "\"L2\"";
+	categories[2] = "\"L3\"";
+	categories[3] = "\"Release\"";
+	categories[4] = "\"Disabled\"";
 }
 
 # Here we filter out any records without exactly 3 fields (i.e. 3-level paths)
 # and categorize the rest.
 NF == 3 {
-	if (/-l1/) category = "\"l1 - Every Patch\"";
-	else if (/-l2/) category = "\"l2 - Risky or Big Patches\"";
-	else if (/-l3/) category = "\"l3 - Daily\"";
-	else if (/-manual/ || /-release/ ) category = "\"remainder - Every Release\"";
-	else if (/-unstable/) category = "\"unstable - Never Run\"";
-	else category = "\"remainder - Every Release\"";
-	cats[category] = 1
+	if (/-l1/) {
+		category = 0;
+	} else if (/-l2/) {
+		category = 1;
+	} else if (/-l3/) {
+		category = 2;
+	} else if (/-unstable/) {
+		category = 4;
+	} else {
+		category = 3;
+	}
+
 	# Each of these categorizes a test into a category, based on a regular
 	# expression. When you add another test category, you should also add
 	# printing to the print group loop below.
-	if (/linux/ || /uboot/ || /edk2/ || /:fvp-([a-z0-9.]-)*spm/ || /:juno-([a-z0-9.]-)*scmi/) integration[category] += 1;
-	else if (/tftf/) component[category] += 1;
-	else if (/coverity/ || /misra/ || /scan_build/) static[category] += 1;
-	else if (/:nil/ || /norun/) build[category] += 1;
-	else print $0 " No test category; excluding from data" >> "/dev/stderr";
+	if (/linux/ || /uboot/ || /edk2/ || /:fvp-([a-z0-9.]-)*spm/ || /:juno-([a-z0-9.]-)*scmi/) {
+		integration[category] += 1;
+	} else if (/tftf/) {
+		component[category] += 1;
+	} else if (/coverity/ || /misra/ || /scan_build/) {
+		static[category] += 1;
+	} else if (/:nil/ || /norun/) {
+		build[category] += 1;
+	} else {
+		print $0 " No test category; excluding from data" >> "/dev/stderr";
+	}
 }
 
-
 END {
-	for (name in cats)
+	for (category = 0; category in categories; category++) {
 		# This prints a single test group, by name. When you add another
 		# category (with another map), add another field to this print.
-		printf("%s %d %d %d %d\n",
-			name,
-			build[name],
-			static[name],
-			component[name],
-			integration[name]);
+		printf("%s	%d	%d	%d	%d\n",
+			categories[category],
+			build[category],
+			static[category],
+			component[category],
+			integration[category]);
+	}
 }
diff --git a/script/graphs/categorize-tests.bash b/script/graphs/categorize-tests.bash
index 92d286c..de5aed1 100644
--- a/script/graphs/categorize-tests.bash
+++ b/script/graphs/categorize-tests.bash
@@ -1,9 +1,11 @@
+#!/usr/bin/env bash
+
 #
-# Copyright (c) 2021 Arm Limited. All rights reserved.
+# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
-#!/usr/bin/env bash
+
 set -euo pipefail
 
 # This script plots the categories of tests by group. It does this by combining
@@ -13,12 +15,7 @@
 # stdout.
 
 # Variables
-# ^^^^^^^^^
-#
-# We are located in a specific location with this repo, so we can take
-# advantage of that to avoid any issues with running this from an unexpected
-# directory.
-rootdir=$(realpath $(dirname $(realpath $0))/../..)
+# =========
 
 # I would like to use process-substitution for this, so that we can avoid
 # making a file on disk and keep everything in memory, removing the need to
@@ -30,35 +27,27 @@
 # removed on success.
 categories=$(mktemp "XXXXXXX-test-categories.dat")
 
-# We change a portion of the title for our graph based on the argument passed
-# to this script.
-title=$(if [[ $# -ge 1 ]] ; then echo $1 ; else echo "All Tests" ; fi)
+# We change a portion of the title for our graph based on the argument passed to
+# this script.
+subtitle=$([[ $# -ge 1 ]] && echo " (Filter: \"$1\")" || true)
 
-# Generate Data into the $categories file
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# Generate Data into the ${categories} file
+# =========================================
 #
 # The following pipeline is the heart of the implementation, and has four
 # stages: find, ???, awk, and sort. The ??? stage of the pipeline is determined
 # by the bash if statement, which switches between a filter, when an argument
-# is passed, and a passthrough, implemented as `cat -`, when no filter
-# argument is passed.
-#
-# Note that the env -C before the find is to enforce that it produces
-# directories relative to the $rootdir, so that it does not trip up the awk
-# script.
-env -C $rootdir find group -type f |\
-	if [[ $# -ge 1 ]] ; then
-		grep -e "$1" -
-	else
-		cat -
-	fi | awk -f ${0%bash}awk | sort > $categories
+# is passed, and a passthrough, implemented as `cat -`, when no filter argument
+# is passed.
+echo '"Name"	"Build-only tests"	"Static checks (MISRA, etc.)"	"Component tests"	"Integration tests (Linux boot, etc.)"' > "${categories}"
+find group -type f | ([[ $# -ge 1 ]] && grep -e "$1" - || cat -) |
+	awk -f "${0%bash}awk" >> "${categories}"
 
 # Generate a Plot (on stdout)
-gnuplot -e "subtitle='$title'" -c ${0%bash}plot $categories
+gnuplot -e "subtitle='${subtitle}'" -c "${0%bash}plot" "${categories}"
 
 # Dump data to stderr
-echo name build static component inegration 1>&2
-cat $categories 1>&2
+cat "${categories}" 1>&2
 
 # Clean up temporary files
-rm $categories
+rm "${categories}"
diff --git a/script/graphs/categorize-tests.plot b/script/graphs/categorize-tests.plot
index 05fe5b2..9286872 100644
--- a/script/graphs/categorize-tests.plot
+++ b/script/graphs/categorize-tests.plot
@@ -1,15 +1,21 @@
 #
-# Copyright (c) 2021 Arm Limited. All rights reserved.
+# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
-set terminal png enhanced font ",18" size 1920, 1080
+
+set title "Incremental Tests Enabled at Each CI Level".subtitle
+set terminal png enhanced font ",16" size 1920, 1080
+
+set datafile separator tab
+set key autotitle columnheader
+
 set style data histograms
 set style histogram rowstacked
+set style fill solid border -1
+
 set boxwidth 0.5 relative
-set style fill solid 1.0 border -1
-set title "Incremental Tests Enabled at each CI level for ".subtitle
-plot ARG1 using 2:xtic(1) title "Build-only",\
-	  '' using 3 title "Static (MISRA, etc.)",\
-	  '' using 4 title "Component",\
-	  '' using 5 title "Integration (boot Linux, etc.)"
+
+stats ARG1 matrix rowheader columnheader nooutput
+
+plot ARG1 using 2:xtic(1), for [i=3:(STATS_columns + 1)] "" using i
diff --git a/script/graphs/sloc-viz.bash b/script/graphs/sloc-viz.bash
index 6bef76f..bc05da2 100644
--- a/script/graphs/sloc-viz.bash
+++ b/script/graphs/sloc-viz.bash
@@ -1,9 +1,11 @@
+#!/usr/bin/env bash
+
 #
-# Copyright (c) 2021 Arm Limited. All rights reserved.
+# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
-#!/usr/bin/env bash
+
 set -euo pipefail
 
 # Overview
@@ -16,69 +18,101 @@
 #
 # This script generates information about the directory that it's run in,
 # aggregated by subdirectory.
+#
 # It is recommended that you run it from within the TF-A root directory for
 # best results.
 
-# Variables
+# Functions
 # =========
 
-# convert newlines to tabs
-n2t="tr \n \t"
+# Convert newlines to tabs
+n2t() {
+	tr "\n" "\t"
+}
+
+# Strip trailing tabs
+strip() {
+	sed 's/\t$//'
+}
+
+# Variables
+# =========
 
 # We will build the final data file incrementally throughout the script. We need
 # A place to store this data, temporarily, so mktemp fills the role.
 data=$(mktemp XXXXXX-sloc.tsv)
 
-# Top level TF-A directories that we consider by themselves.
-toplevel=$(find -mindepth 1 -maxdepth 1 -type d -and ! -name ".*" | sed "s|./||g")
+# Subdirectories that we will analyze
+analyze=("${@:-"." "./drivers" "./plat" "./tools"}")
 
-# Second level TF-A directories that we consider separately.
-secondlevel=$(find drivers plat -mindepth 1 -maxdepth 1 -type d || true)
+# Top-level directories that we will analyze
+readarray -t dirs < <(find ${analyze[@]} -maxdepth 1 -type d -not -path '*/\.*' | sort -u)
 
 # We want to be sure that we always put the data in the same order, with the
 # same keys in the resulting TSV file. To ensure this, we keep a json-encoded
 # array of the categories we would like to show in the graph.
-# This was generated by taking the output of `tokei --output json | jq keys`
-# and trimming out things that we don't really need like "Svg"
-categories='["AssemblyGAS", "C", "CHeader", "DeviceTree", "Makefile", "Python", "ReStructuredText"]'
+#
+# This was generated by taking the output of `tokei --output json | jq keys` and
+# trimming out things that we don't really need like "Svg"
+categories=$(tokei --output json | jq 'keys - ["Total", "Autoconf", "CSS", "JSON", "Module-Definition", "Plain Text", "SVG", "SWIG", "XML" ]')
 
 # Data File Generation
 # ====================
 #
-# Below we generate the data file used for the graph. The table is a
-# tab separated value(TSV) matrix with columns of code language (Bash, C, etc),
+# Below we generate the data file used for the graph. The table is a tab
+# separated value (TSV) matrix with columns of code language (Bash, C, etc.),
 # and rows of subdirectories of TF-A that contain the code.
 
 # Column headers
 # --------------
-(echo module; echo $categories | jq ".[]" ) | $n2t  > $data
-# add a newline
-echo >> $data
+(echo "Module"; echo ${categories} | jq ".[]" ) | n2t  > "${data}"
+echo >> "${data}"
 
 # Build Each Row
 # --------------
-for dir in $toplevel $secondlevel; do
+for dir in "${dirs[@]}"; do
+	# Don't process directories that are ignored by Git
+	if git check-ignore -q "${dir}"; then
+		continue
+	fi
+
 	# Gnuplot likes to treat underscores as a syntax for subscripts. This
 	# looks weird, as module names are not named with this syntax in mind.
-	# Further, it turns out that we go through 3 expansions, so we need 8 (2^3)
-	# backslashes.
-	echo $dir | sed -e "s/_/\\\\\\\\_/g" | $n2t >> $data
+	# Further, it turns out that we go through 3 expansions, so we need 8
+	# (2^3) backslashes.
+	echo "${dir}" | sed -e "s/_/\\\\\\\\_/g" | n2t >> "${data}"
+
+	# Additional arguments to Tokei
+	args=()
+
+	# Don't include the statistics of this directory's children in its own
+	# statistics if they are going to be analyzed separately.
+	readarray -t excludes < <(printf '%s\n' "${dirs[@]}" | grep "${dir}/")
+
+	for exclude in "${excludes[@]}"; do
+		# Tokei uses gitignore syntax, so we need to strip the leading
+		# period.
+		args+=(--exclude "${exclude#.}")
+	done
+
 	# This is the heart of the implementation, and probably the most
 	# complicated line in this script. First, we generate the subdirectory
-	# sloc with tokei, in json format. We then filter it with jq. The jq
+	# sloc with tokei, in JSON format. We then filter it with jq. The jq
 	# filter iterates over the column names as saved in the categories
 	# variable. Each iteration through the loop, we print out the code
 	# value, when it exists, or null + 0. This takes advantage of the
-	# property of null:
-	#  > null can be added to any value, and returns the other value
-	#  > unchanged.
-	tokei --output json $dir \
-	        | jq " .[$categories[]].code + 0" \
-		| $n2t >> $data
-	echo  >> $data
+	# property of null:
+	#
+	# > null can be added to any value, and returns the other value
+	# > unchanged.
+	tokei "${dir}" --output json "${args[@]}" \
+		| jq " .[${categories}[]].code + 0" \
+		| n2t | strip >> "${data}"
+
+	echo >> "${data}"
 done
 
-cat $data 1>&2
-gnuplot -c ${0%bash}plot $data
+cat "${data}" 1>&2
+gnuplot -c "${0%bash}plot" "${data}"
 
-rm $data
+rm "${data}"
diff --git a/script/graphs/sloc-viz.plot b/script/graphs/sloc-viz.plot
index a334b89..24633ce 100644
--- a/script/graphs/sloc-viz.plot
+++ b/script/graphs/sloc-viz.plot
@@ -1,23 +1,38 @@
 #
-# Copyright (c) 2021 Arm Limited. All rights reserved.
+# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
-#
-# Stacked histograms
-#
-set terminal png enhanced font ",18" size 1920, 1080
+
 set title "Source Lines of Code by Module"
-set key invert reverse Left outside
+set terminal png enhanced font ",16" size 1920, 1080
+
+set datafile separator tab
+
 set key autotitle columnheader
-set auto y
-set auto x
-unset xtics
+set key reverse Left outside
+
 set xtics nomirror rotate by -75 scale 0
+
 set style data histogram
 set style histogram rowstacked
 set style fill solid border -1
+
 set boxwidth 0.75
-#
-plot ARG1 using 2:xtic(1), for [i=3:8] '' using i
-#
+
+set lt 1 lc rgb "#C971B2"
+set lt 2 lc rgb "#78D19F"
+set lt 3 lc rgb "#CB9B6B"
+set lt 4 lc rgb "#7696C0"
+set lt 5 lc rgb "#ECEAC6"
+set lt 6 lc rgb "#D2CCDA"
+set lt 7 lc rgb "#766AC9"
+set lt 8 lc rgb "#C86D6A"
+set lt 9 lc rgb "#92CCD7"
+set lt 10 lc rgb "#DEAAB5"
+set lt 11 lc rgb "#BC9FD9"
+set lt 12 lc rgb "#A5B08B"
+
+stats ARG1 matrix rowheader columnheader nooutput
+
+plot ARG1 using 2:xtic(1), for [i=3:(STATS_columns + 1)] "" using i