blob: bc05da21b8c6a4899eab37abe18f2a4cf4b9ed87 [file] [log] [blame]
#!/usr/bin/env bash
#
# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#
set -euo pipefail
# Overview
# ========
#
# This script generates source lines of code as a tab separated values (TSV)
# file and a stacked bar chart. It uses `tokei` for gathering the data, and
# `gnuplot` for generating the plot. The data is available on stderr and the
# plot will be put in stdout.
#
# This script generates information about the directory that it's run in,
# aggregated by subdirectory.
#
# It is recommended that you run it from within the TF-A root directory for
# best results.
# Functions
# =========
# Convert newlines to tabs
n2t() {
tr "\n" "\t"
}
# Strip trailing tabs
strip() {
sed 's/\t$//'
}
# Variables
# =========
# We will build the final data file incrementally throughout the script. We need
# A place to store this data, temporarily, so mktemp fills the role.
data=$(mktemp XXXXXX-sloc.tsv)
# Subdirectories that we will analyze
analyze=("${@:-"." "./drivers" "./plat" "./tools"}")
# Top-level directories that we will analyze
readarray -t dirs < <(find ${analyze[@]} -maxdepth 1 -type d -not -path '*/\.*' | sort -u)
# We want to be sure that we always put the data in the same order, with the
# same keys in the resulting TSV file. To ensure this, we keep a json-encoded
# array of the categories we would like to show in the graph.
#
# This was generated by taking the output of `tokei --output json | jq keys` and
# trimming out things that we don't really need like "Svg"
categories=$(tokei --output json | jq 'keys - ["Total", "Autoconf", "CSS", "JSON", "Module-Definition", "Plain Text", "SVG", "SWIG", "XML" ]')
# Data File Generation
# ====================
#
# Below we generate the data file used for the graph. The table is a tab
# separated value (TSV) matrix with columns of code language (Bash, C, etc.),
# and rows of subdirectories of TF-A that contain the code.
# Column headers
# --------------
(echo "Module"; echo ${categories} | jq ".[]" ) | n2t > "${data}"
echo >> "${data}"
# Build Each Row
# --------------
for dir in "${dirs[@]}"; do
# Don't process directories that are ignored by Git
if git check-ignore -q "${dir}"; then
continue
fi
# Gnuplot likes to treat underscores as a syntax for subscripts. This
# looks weird, as module names are not named with this syntax in mind.
# Further, it turns out that we go through 3 expansions, so we need 8
# (2^3) backslashes.
echo "${dir}" | sed -e "s/_/\\\\\\\\_/g" | n2t >> "${data}"
# Additional arguments to Tokei
args=()
# Don't include the statistics of this directory's children in its own
# statistics if they are going to be analyzed separately.
readarray -t excludes < <(printf '%s\n' "${dirs[@]}" | grep "${dir}/")
for exclude in "${excludes[@]}"; do
# Tokei uses gitignore syntax, so we need to strip the leading
# period.
args+=(--exclude "${exclude#.}")
done
# This is the heart of the implementation, and probably the most
# complicated line in this script. First, we generate the subdirectory
# sloc with tokei, in JSON format. We then filter it with jq. The jq
# filter iterates over the column names as saved in the categories
# variable. Each iteration through the loop, we print out the code
# value, when it exists, or null + 0. This takes advantage of the
# property of null:
#
# > null can be added to any value, and returns the other value
# > unchanged.
tokei "${dir}" --output json "${args[@]}" \
| jq " .[${categories}[]].code + 0" \
| n2t | strip >> "${data}"
echo >> "${data}"
done
cat "${data}" 1>&2
gnuplot -c "${0%bash}plot" "${data}"
rm "${data}"