#!/bin/bash
#
# SPDX-FileCopyrightText: 2012 Christian Babeux <christian.babeux@efficios.com>
# SPDX-FileCopyrightText: 2012 David Goulet <dgoulet@efficios.com>
#
# SPDX-License-Identifier: LGPL-2.1-only

TEST_DESC="Streaming - High throughput with bandwidth limits"

# The goal of this test is to try and catch races during tests in conditions
# where bandwidth is quite limited. It is useful to have enough iterations and
# apps so that there are dropped events.

CURDIR=$(dirname "$0")/
TESTDIR="$CURDIR/../../.."
NR_APP_ITER=10
NR_ITER=5000
TESTAPP_PATH="$TESTDIR/utils/testapp"
TESTAPP_NAME="gen-ust-events"
TESTAPP_BIN="$TESTAPP_PATH/$TESTAPP_NAME/$TESTAPP_NAME"
SESSION_NAME="high-throughput"
EVENT_NAME="tp:tptest"
SESSIOND_CTRL_PORT=5342
SESSIOND_DATA_PORT=5343
DEFAULT_IF="lo"

TRACE_PATH=$(mktemp -d -t tmp.test_streaming_high_throughput_limits_trace_path.XXXXXX)

NUM_TESTS=39

# shellcheck source-path=SCRIPTDIR/../../../
source $TESTDIR/utils/utils.sh

if [ ! -x "$TESTAPP_BIN" ]; then
	BAIL_OUT "No UST events binary detected."
fi

function reset_bw_limit
{
	tc qdisc del dev $DEFAULT_IF root >/dev/null 2>&1
	return $?
}

function set_bw_limit
{
	limit=$1
	ctrlportlimit=$(($limit/10))
	# failsafe to have at least 1kbit/s for control (in the case where $1 < 10)
	[ $ctrlportlimit = 0 ] && ctrlportlimit=1
	# if $1 < 10, we might bust the limit set here, but the
	# parent qdisc (1:) will always limit us to the right max value
	dataportlimit=$((9*${ctrlportlimit}))

	diag "Set bandwidth limits to ${limit}kbits, ${ctrlportlimit} for control and ${dataportlimit} for data"

	if ! tc qdisc add dev $DEFAULT_IF root handle 1: htb default 15 >/dev/null 2>&1 ; then
		reset_bw_limit
		return 1
	fi

	# the total bandwidth is the limit set by the user
	if ! tc class add dev $DEFAULT_IF parent 1: classid 1:1 htb rate ${limit}kbit ceil ${limit}kbit >/dev/null 2>&1 ; then
		reset_bw_limit
		return 1
	fi
	# 1/10 of the bandwidth guaranteed and traffic prioritized for the control port
	if ! tc class add dev $DEFAULT_IF parent 1:1 classid 1:10 htb rate ${ctrlportlimit}kbit ceil ${limit}kbit prio 1 >/dev/null 2>&1 ; then
		reset_bw_limit
		return 1
	fi
	# 9/10 of the bandwidth guaranteed and can borrow up to the total bandwidth (if unused)
	if ! tc class add dev $DEFAULT_IF parent 1:1 classid 1:11 htb rate ${dataportlimit}kbit ceil ${limit}kbit prio 2 >/dev/null 2>&1 ; then
		reset_bw_limit
		return 1
	fi

	# filter to assign control traffic to the 1:10 class
	if ! tc filter add dev $DEFAULT_IF parent 1: protocol ip u32 match ip dport $SESSIOND_CTRL_PORT 0xffff flowid 1:10 >/dev/null 2>&1 ; then
		reset_bw_limit
		return 1
	fi
	# filter to assign data traffic to the 1:11 class
	if ! tc filter add dev $DEFAULT_IF parent 1: protocol ip u32 match ip dport $SESSIOND_DATA_PORT 0xffff flowid 1:11 >/dev/null 2>&1 ; then
		reset_bw_limit
		return 1
	fi

	return 0
}

function create_lttng_session_with_uri
{
	sess_name=$1
	uri=$2
	# Create session with custom URI
	"$TESTDIR/../src/bin/lttng/$LTTNG_BIN" create -U "$uri" "$sess_name" >/dev/null 2>&1
	ok $? "Create session with uri $uri"
}

function run_apps
{
	for i in $(seq 1 $NR_APP_ITER); do
		# With bandwidth limitation, unfortunately, application easily timeout
		# due to very slow communication between the consumer and relayd making
		# the status reply from the consumer quite slow thus delaying the
		# registration done message.
		LTTNG_UST_REGISTER_TIMEOUT=-1 $TESTAPP_BIN -i "$NR_ITER" >/dev/null 2>&1 &
		tracee_pids+=("${!}")
	done
}

function test_high_throughput
{
	NETWORK_URI="net://localhost"
	create_lttng_session_with_uri $SESSION_NAME $NETWORK_URI
	enable_ust_lttng_event_ok $SESSION_NAME $EVENT_NAME
	start_lttng_tracing_ok $SESSION_NAME
	run_apps
	diag "Waiting for applications to end"
	wait "${tracee_pids[@]}"
	tracee_pids=()
	pass "waiting done"
	stop_lttng_tracing_ok $SESSION_NAME
	destroy_lttng_session_ok $SESSION_NAME
	validate_event_count
}

function validate_event_count
{
	TEMP_FILE=$(mktemp -t tmp.streaming_high_throughput_limit_file1.XXXXXX)
	TEMP_FILE_2=$(mktemp -t tmp.streaming_high_throughput_limit_file2.XXXXXX)

	traced=$("$BABELTRACE_BIN" "$TRACE_PATH" 2>"${TEMP_FILE_2}" | wc -l)
	cat "$TEMP_FILE_2" | cut -f4 -d " " >"$TEMP_FILE"

	dropped=0
	while read -r line; do
		dropped=$(( dropped + line ))
	done < "$TEMP_FILE"

	total=$(( dropped + traced ))
	wanted=$(( NR_APP_ITER * NR_ITER ))

	if [ $dropped -le 0 ]; then
		diag "No dropped events during test, consider increasing the number of " \
			 "apps or iterations"
	fi
	if [ $wanted -ne $total ]; then
		fail "Validate trace event count"
		diag "Expected $wanted. Dropped $dropped. Recorded $traced. Total $total... "
		return 1
	else
		pass "Validate trace event count"
		diag "Expected $wanted. Dropped $dropped. Recorded $traced. Total $total... "

		rm -rf "$TRACE_PATH"
		rm "$TEMP_FILE" "$TEMP_FILE_2"

		return 0
	fi
}

function interrupt_cleanup()
{
	diag "*** Exiting ***"
	reset_bw_limit
	# invoke utils cleanup
	full_cleanup
}

plan_tests $NUM_TESTS

print_test_banner "$TEST_DESC"

bail_out_if_no_babeltrace

check_skip_kernel_test "$NUM_TESTS" "Skipping all tests." ||
{

	# Catch sigint and try to cleanup limits
	trap interrupt_cleanup SIGTERM SIGINT

	BW_LIMITS=(3200 400 100)
	tracee_pids=()
	for BW in "${BW_LIMITS[@]}";
	do
		diag "Test high-throughput with bandwidth limit set to ${BW}kbits"

		set_bw_limit "$BW"
		ok $? "Setting bandwidth limit"

		# shellcheck disable=SC2119
		start_lttng_sessiond
		start_lttng_relayd "-o $TRACE_PATH"
		test_high_throughput
		result=$?
		# shellcheck disable=SC2119
		stop_lttng_relayd
		# shellcheck disable=SC2119
		stop_lttng_sessiond
		reset_bw_limit
		ok $? "Reset bandwith limits"
	done
}
