script/graphs/sloc-viz.bash - next/ci/tf-a-ci-scripts - TrustedFirmware Git Browser

 #!/usr/bin/env bash

 #
 # Copyright (c) 2021-2022 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #

 set -euo pipefail

 # Overview
 # ========
 #
 # This script generates source lines of code as a tab separated values (TSV)
 # file and a stacked bar chart. It uses `tokei` for gathering the data, and
 # `gnuplot` for generating the plot. The data is available on stderr and the
 # plot will be put in stdout.
 #
 # This script generates information about the directory that it's run in,
 # aggregated by subdirectory.
 #
 # It is recommended that you run it from within the TF-A root directory for
 # best results.

 # Functions
 # =========

 # Convert newlines to tabs
 n2t() {
 	tr "\n" "\t"
 }

 # Strip trailing tabs
 strip() {
 	sed 's/\t$//'
 }

 # Variables
 # =========

 # We will build the final data file incrementally throughout the script. We need
 # A place to store this data, temporarily, so mktemp fills the role.
 data=$(mktemp XXXXXX-sloc.tsv)

 # Subdirectories that we will analyze
 analyze=("${@:-"." "./drivers" "./plat" "./tools"}")

 # Top-level directories that we will analyze
 readarray -t dirs < <(find ${analyze[@]} -maxdepth 1 -type d -not -path '*/\.*' | sort -u)

 # We want to be sure that we always put the data in the same order, with the
 # same keys in the resulting TSV file. To ensure this, we keep a json-encoded
 # array of the categories we would like to show in the graph.
 #
 # This was generated by taking the output of `tokei --output json | jq keys` and
 # trimming out things that we don't really need like "Svg"
 categories=$(tokei --output json | jq 'keys - ["Total", "Autoconf", "CSS", "JSON", "Module-Definition", "Plain Text", "SVG", "SWIG", "XML" ]')

 # Data File Generation
 # ====================
 #
 # Below we generate the data file used for the graph. The table is a tab
 # separated value (TSV) matrix with columns of code language (Bash, C, etc.),
 # and rows of subdirectories of TF-A that contain the code.

 # Column headers
 # --------------
 (echo "Module"; echo ${categories} | jq ".[]" ) | n2t  > "${data}"
 echo >> "${data}"

 # Build Each Row
 # --------------
 for dir in "${dirs[@]}"; do
 	# Don't process directories that are ignored by Git
 	if git check-ignore -q "${dir}"; then
 		continue
 	fi

 	# Gnuplot likes to treat underscores as a syntax for subscripts. This
 	# looks weird, as module names are not named with this syntax in mind.
 	# Further, it turns out that we go through 3 expansions, so we need 8
 	# (2^3) backslashes.
 	echo "${dir}" | sed -e "s/_/\\\\\\\\_/g" | n2t >> "${data}"

 	# Additional arguments to Tokei
 	args=()

 	# Don't include the statistics of this directory's children in its own
 	# statistics if they are going to be analyzed separately.
 	readarray -t excludes < <(printf '%s\n' "${dirs[@]}" | grep "${dir}/")

 	for exclude in "${excludes[@]}"; do
 		# Tokei uses gitignore syntax, so we need to strip the leading
 		# period.
 		args+=(--exclude "${exclude#.}")
 	done

 	# This is the heart of the implementation, and probably the most
 	# complicated line in this script. First, we generate the subdirectory
 	# sloc with tokei, in JSON format. We then filter it with jq. The jq
 	# filter iterates over the column names as saved in the categories
 	# variable. Each iteration through the loop, we print out the code
 	# value, when it exists, or null + 0. This takes advantage of the
 	# property of null:
 	#
 	# > null can be added to any value, and returns the other value
 	# > unchanged.
 	tokei "${dir}" --output json "${args[@]}" \
 		| jq " .[${categories}[]].code + 0" \
 		| n2t | strip >> "${data}"

 	echo >> "${data}"
 done

 cat "${data}" 1>&2
 gnuplot -c "${0%bash}plot" "${data}"

 rm "${data}"
	#!/usr/bin/env bash

	#
	# Copyright (c) 2021-2022 Arm Limited. All rights reserved.
	#
	# SPDX-License-Identifier: BSD-3-Clause
	#

	set -euo pipefail

	# Overview
	# ========
	#
	# This script generates source lines of code as a tab separated values (TSV)
	# file and a stacked bar chart. It uses `tokei` for gathering the data, and
	# `gnuplot` for generating the plot. The data is available on stderr and the
	# plot will be put in stdout.
	#
	# This script generates information about the directory that it's run in,
	# aggregated by subdirectory.
	#
	# It is recommended that you run it from within the TF-A root directory for
	# best results.

	# Functions
	# =========

	# Convert newlines to tabs
	n2t() {
	tr "\n" "\t"
	}

	# Strip trailing tabs
	strip() {
	sed 's/\t$//'
	}

	# Variables
	# =========

	# We will build the final data file incrementally throughout the script. We need
	# A place to store this data, temporarily, so mktemp fills the role.
	data=$(mktemp XXXXXX-sloc.tsv)

	# Subdirectories that we will analyze
	analyze=("${@:-"." "./drivers" "./plat" "./tools"}")

	# Top-level directories that we will analyze
	readarray -t dirs < <(find ${analyze[@]} -maxdepth 1 -type d -not -path '/\.' \| sort -u)

	# We want to be sure that we always put the data in the same order, with the
	# same keys in the resulting TSV file. To ensure this, we keep a json-encoded
	# array of the categories we would like to show in the graph.
	#
	# This was generated by taking the output of `tokei --output json \| jq keys` and
	# trimming out things that we don't really need like "Svg"
	categories=$(tokei --output json \| jq 'keys - ["Total", "Autoconf", "CSS", "JSON", "Module-Definition", "Plain Text", "SVG", "SWIG", "XML" ]')

	# Data File Generation
	# ====================
	#
	# Below we generate the data file used for the graph. The table is a tab
	# separated value (TSV) matrix with columns of code language (Bash, C, etc.),
	# and rows of subdirectories of TF-A that contain the code.

	# Column headers
	# --------------
	(echo "Module"; echo ${categories} \| jq ".[]" ) \| n2t > "${data}"
	echo >> "${data}"

	# Build Each Row
	# --------------
	for dir in "${dirs[@]}"; do
	# Don't process directories that are ignored by Git
	if git check-ignore -q "${dir}"; then
	continue
	fi

	# Gnuplot likes to treat underscores as a syntax for subscripts. This
	# looks weird, as module names are not named with this syntax in mind.
	# Further, it turns out that we go through 3 expansions, so we need 8
	# (2^3) backslashes.
	echo "${dir}" \| sed -e "s/_/\\\\\\\\_/g" \| n2t >> "${data}"

	# Additional arguments to Tokei
	args=()

	# Don't include the statistics of this directory's children in its own
	# statistics if they are going to be analyzed separately.
	readarray -t excludes < <(printf '%s\n' "${dirs[@]}" \| grep "${dir}/")

	for exclude in "${excludes[@]}"; do
	# Tokei uses gitignore syntax, so we need to strip the leading
	# period.
	args+=(--exclude "${exclude#.}")
	done

	# This is the heart of the implementation, and probably the most
	# complicated line in this script. First, we generate the subdirectory
	# sloc with tokei, in JSON format. We then filter it with jq. The jq
	# filter iterates over the column names as saved in the categories
	# variable. Each iteration through the loop, we print out the code
	# value, when it exists, or null + 0. This takes advantage of the
	# property of null:
	#
	# > null can be added to any value, and returns the other value
	# > unchanged.
	tokei "${dir}" --output json "${args[@]}" \
	\| jq " .[${categories}[]].code + 0" \
	\| n2t \| strip >> "${data}"

	echo >> "${data}"
	done

	cat "${data}" 1>&2
	gnuplot -c "${0%bash}plot" "${data}"

	rm "${data}"