zeek/auxil/btest/btest-diff

#! /usr/bin/env bash
#
# Usage:  btest-diff [options] <filename>
#
# These environment variables are set by btest:
#   TEST_MODE={TEST|UPDATE|UPDATE_INTERACTIVE}
#   TEST_BASELINE
#   TEST_DIAGNOSTICS
#   TEST_NAME
#
# A test can optionally set these environment variables:
#   TEST_DIFF_CANONIFIER
#   TEST_DIFF_BRIEF
#   TEST_DIFF_FILE_MAX_LINES
#
# This script has the following exit codes:
#
# When TEST_MODE is TEST:
# 0 - Comparison succeeded, files are the same
# 1 - Problems with input file/args or running TEST_DIFF_CANONIFIER, or file contents differ
# 2 - Other diffing trouble (inherited from diff)
# 100 - No baseline to compare to available
#
# When TEST_MODE is UPDATE:
# 0 - Baseline updated
# 1 - Problems with input file/args or running TEST_DIFF_CANONIFIER
#
# When TEST_MODE is UPDATE_INTERACTIVE:
# 0 - Baseline updated, or nothing to update
# 1 - Problems with input file/args or running TEST_DIFF_CANONIFIER, or user skips a deviating baseline
# 200 - User asks to abort after a deviating baseline
#
# Otherwise: exits with 1

# It's okay to check $? explicitly:
# shellcheck disable=SC2181

# Maximum number of lines to show from mismatching input file by default.
MAX_LINES=100

# Header line we tuck onto new baselines generated by
# btest-diff. Serves both as a warning and as an indicator that the
# baseline has been run through the TEST_DIFF_CANONIFIER (if any).
HEADER="### BTest baseline data generated by btest-diff. Do not edit. Use \"btest -U/-u\" to update. Requires BTest >= 0.63."

# btest-diff supports a binary mode to simplify the handling of files
# that are better treated as binary blobs rather than text files. In
# binary mode, we treat the input file as-is, meaning:
#
# - only check whether input and baseline are identical
# - don't prepend our btest header line when updating baseline
# - don't canonify when updating baseline
#
BINARY_MODE=

is_binary_mode() {
    test -n "$BINARY_MODE"
}

# Predicate, succeeds if the given baseline is canonicalized.
is_canon_baseline() {
    local input="$1"

    # The baseline is canonicalized when we find our header in it. To
    # allow for some wiggle room in updating the wording in the header
    # in the future, we don't fix the exact string, and end after the
    # "Do not edit." sentence.
    local header
    header=$(echo "$HEADER" | sed -E 's/Do not edit\..*/Do not edit./')

    if head -n 1 "$input" | grep -q -F "$header" 2>/dev/null; then
        return 0
    fi

    return 1
}

# Prints the requested baseline to standard out if it is canonicalized
# or we're using binary mode. Otherwise fails and prints nothing.
get_baseline() {
    local input="$1"

    if is_binary_mode; then
        cat "$input"
        return 0
    fi

    ! is_canon_baseline "$input" && return 1
    tail -n +2 "$input"
}

# Updates the given baseline to the given filename inside the *first*
# baseline directory. Prepends our header if we're not in binary mode.
update_baseline() {
    local input="$1"
    local output="${baseline_dirs[0]}/$2"

    if ! is_binary_mode; then
        echo "$HEADER" >"$output"
        cat "$input" >>"$output"
    else
        cat "$input" >"$output"
    fi
}

# ---- Main program ----------------------------------------------------

while [ "$1" != "" ]; do
    case "$1" in
        "--binary")
            BINARY_MODE=1
            shift
            ;;
        *)
            break
            ;;
    esac
done

if [ -n "$TEST_DIFF_FILE_MAX_LINES" ]; then
    MAX_LINES=$TEST_DIFF_FILE_MAX_LINES
fi

if [ "$TEST_DIAGNOSTICS" = "" ]; then
    TEST_DIAGNOSTICS=/dev/stdout
fi

if [ "$#" -lt 1 ]; then
    echo "btest-diff: wrong number of arguments" >"$TEST_DIAGNOSTICS"
    exit 1
fi

# Split string with baseline directories into array.
if [ "$(uname -s | cut -c 1-5)" == "MINGW" ]; then
    IFS=';' read -ra baseline_dirs <<<"$TEST_BASELINE"
else
    IFS=':' read -ra baseline_dirs <<<"$TEST_BASELINE"
fi

input="$1"
# shellcheck disable=SC2001
canon=$(echo "$input" | sed 's#/#.#g')
shift

if [ ! -f "$input" ]; then
    echo "btest-diff: input $input does not exist." >"$TEST_DIAGNOSTICS"
    exit 1
fi

tmpfiles=""
delete_tmps() {
    # shellcheck disable=SC2086,SC2317
    rm -f $tmpfiles 2>/dev/null
}

trap delete_tmps 0

# First available baseline across directories.
baseline=""
for dir in "${baseline_dirs[@]}"; do
    test -f "$dir/$canon" && baseline="$dir/$canon" && break
done

result=2

rm -f "$TEST_DIAGNOSTICS" 2>/dev/null

echo "== File ===============================" >>"$TEST_DIAGNOSTICS"

if [ -z "$baseline" ]; then
    cat "$input" >>"$TEST_DIAGNOSTICS"
elif [ -n "$TEST_DIFF_BRIEF" ]; then
    echo "<Content not shown>" >>"$TEST_DIAGNOSTICS"
else
    if [ "$(wc -l "$input" | awk '{print $1}')" -le "$MAX_LINES" ]; then
        cat "$input" >>"$TEST_DIAGNOSTICS"
    else
        head -n "$MAX_LINES" "$input" >>"$TEST_DIAGNOSTICS"
        echo "[... File too long, truncated ...]" >>"$TEST_DIAGNOSTICS"
    fi
fi

# If no canonifier is defined, just copy. Simplifies code layout.
# In binary mode, always just copy.
if [ -z "$TEST_DIFF_CANONIFIER" ] || is_binary_mode; then
    TEST_DIFF_CANONIFIER="cat"
fi

canon_output=$(mktemp -t "test-diff.$canon.tmp.XXXXXX")
tmpfiles="$tmpfiles $canon_output"
error=0

# Canonicalize the new test output.
# shellcheck disable=SC2094
eval "$TEST_DIFF_CANONIFIER" "$input" <"$input" >"$canon_output"
if [ $? -ne 0 ]; then
    echo "== Error ==============================" >>"$TEST_DIAGNOSTICS"
    echo "btest-diff: TEST_DIFF_CANONIFIER failed on file '$input'" >>"$TEST_DIAGNOSTICS"
    error=1
    result=1
fi

if [ -n "$baseline" ]; then
    canon_baseline=$(mktemp -t "test-diff.$canon.baseline.tmp.XXXXXX")
    tmpfiles="$tmpfiles $canon_baseline"

    # Prepare the baseline. When created by a recent btest-diff, we
    # don't need to re-canonicalize, otherwise we do.
    if ! get_baseline "$baseline" >"$canon_baseline"; then
        # It's an older uncanonicalized baseline, so canonicalize
        # it now prior to comparison. Future updates via btest
        # -U/-u will then store it canonicalized.
        # shellcheck disable=SC2094
        eval "$TEST_DIFF_CANONIFIER" "$baseline" <"$baseline" >"$canon_baseline"
        if [ $? -ne 0 ]; then
            echo "== Error ==============================" >>"$TEST_DIAGNOSTICS"
            echo "btest-diff: TEST_DIFF_CANONIFIER failed on file '$baseline'" >>"$TEST_DIAGNOSTICS"
            error=1
            result=1
        fi
    fi

    if [ $error -eq 0 ]; then
        echo "== Diff ===============================" >>"$TEST_DIAGNOSTICS"
        if is_binary_mode; then
            diff -s "$@" "$canon_baseline" "$canon_output" >>"$TEST_DIAGNOSTICS"
        else
            # We'd use --strip-trailing-cr in the following, but it's not guaranteed.
            diff -au "$@" \
                <(sed 's/\r$//' "$canon_baseline") \
                <(sed 's/\r$//' "$canon_output") >>"$TEST_DIAGNOSTICS"
        fi
        result=$?
    fi
elif [ "$TEST_MODE" = "TEST" ]; then
    echo "== Error ==============================" >>"$TEST_DIAGNOSTICS"
    echo "test-diff: no baseline found." >>"$TEST_DIAGNOSTICS"
    result=100
fi

echo "=======================================" >>"$TEST_DIAGNOSTICS"

if [ "$TEST_MODE" = "TEST" ]; then
    exit $result

elif [ "$TEST_MODE" = "UPDATE_INTERACTIVE" ]; then

    # We had a problem running the canonifier
    if [ "$error" != 0 ]; then
        exit 1
    fi

    # There's no change to the baseline, so skip user interaction
    if [ "$result" = 0 ]; then
        exit 0
    fi

    btest-ask-update
    rc=$?

    echo -n "$TEST_NAME ..." >/dev/tty

    if [ $rc = 0 ]; then
        update_baseline "$canon_output" "$canon"
        exit 0
    fi

    exit $rc

elif [ "$TEST_MODE" = "UPDATE" ]; then

    # We had a problem running the canonifier
    if [ "$error" != 0 ]; then
        exit 1
    fi

    update_baseline "$canon_output" "$canon"
    exit 0
fi

echo "test-diff: unknown test mode $TEST_MODE" >"$TEST_DIAGNOSTICS"
exit 1