231 lines
7.7 KiB
Bash
Executable File
231 lines
7.7 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
ZEEK_BUILD=""
|
|
DATA_FILE=""
|
|
MODE="intf"
|
|
INTERFACE=""
|
|
SEED_FILE=""
|
|
|
|
# Path where flamegraph is installed
|
|
FLAMEGRAPH_PATH=""
|
|
FLAMEGRAPH_PREFIX="benchmark"
|
|
|
|
usage() {
|
|
usage="\
|
|
Usage: $0 -z [zeek binary path] -d [data file path]
|
|
|
|
Options:
|
|
-b, --build PATH The path to a Zeek binary to benchmark
|
|
-d, --data-file PATH The path to a data file to read from for replay
|
|
-m, --mode MODE This can be one of three possible values:
|
|
intf, read, or flamegraph. This controls what
|
|
mode is used for the benchmark run, and defaults
|
|
to intf if not passed. The modes are described
|
|
below.
|
|
-i, --interface INTF The network interface to use for capturing data.
|
|
This interface should be completely idle, since
|
|
tcpreplay will be using it to replay the data.
|
|
This argument is ignored if the mode is 'file'.
|
|
-f, --flamegraph PATH The path to the directory where Flamegraph is
|
|
installed. This argument is required if the mode
|
|
is 'flamegraph', but is ignored otherwise.
|
|
-o, --output FILE The file prefix to use as output for Flamegraph.
|
|
This defaults to 'benchmark'. This argument is ignored
|
|
if the mode is not 'flamegraph'.
|
|
-s, --seed FILE (optional) A path to a Zeek random seed file.
|
|
This is used control the generation of connection
|
|
IDs and other data so it is consistent between
|
|
benchmarking runs.
|
|
|
|
By default or when 'intf' is passed for the mode argument, the output will
|
|
include CPU, memory, etc statistics from Zeek processing all of the data
|
|
in the data file as if it was reading it live from the network. This mode
|
|
requires an interface to be passed using the -i argument.
|
|
|
|
When 'file' is passed for the mode (-m) argument, the output will include
|
|
the runtime and maximum memory usage of Zeek when reading the data file
|
|
directly from disk.
|
|
|
|
When 'flamegraph' is passed for the mode (-m) argument, this script will
|
|
output two flamegraphs for the process runtime in svg format. The first
|
|
flamegraph is a standard graph showing the time spent in functions,
|
|
stacked in the normal manner. The second graph is 'stack-reversed'.
|
|
|
|
Symbols in Flamegraph outputs may not correctly stack unless the various
|
|
libraries linked into Zeek are built with frame pointers. This includes
|
|
glibc, libpcap, and openssl. Rebuilding those libraries with the
|
|
-fno-omit-frame-pointer compiler flag may provide more accurate output.
|
|
You can set libraries that get preloaded by setting the PRELOAD_LIBS
|
|
variable in the script.
|
|
|
|
This script assumes that it is being run on a system with a large number
|
|
of CPU cores. If being used on a smaller system, modify this script and
|
|
set the ZEEK_CPU and TCPREPLAY_CPU variables to smaller values.
|
|
"
|
|
|
|
echo "${usage}"
|
|
exit 1
|
|
}
|
|
|
|
while ( ("$#")); do
|
|
case "$1" in
|
|
-d | --data-file)
|
|
DATA_FILE=$2
|
|
shift 2
|
|
;;
|
|
-b | --build)
|
|
ZEEK_BUILD=$2
|
|
shift 2
|
|
;;
|
|
-m | --mode)
|
|
MODE=$2
|
|
shift 2
|
|
;;
|
|
-i | --interface)
|
|
INTERFACE=$2
|
|
shift 2
|
|
;;
|
|
-f | --flamegraph)
|
|
FLAMEGRAPH_PATH=$2
|
|
shift 2
|
|
;;
|
|
-o | --output)
|
|
FLAMEGRAPH_PREFIX=$2
|
|
shift 2
|
|
;;
|
|
-s | --seed)
|
|
SEED_FILE=$2
|
|
shift 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ "${MODE}" != "intf" -a "${MODE}" != "file" -a "${MODE}" != "flamegraph" ]; then
|
|
echo "Error: -m argument should be one of 'intf', 'file', or 'flamegraph'"
|
|
echo
|
|
usage
|
|
fi
|
|
|
|
if [ -z "${ZEEK_BUILD}" ]; then
|
|
echo "Error: -b argument is required and should point at a Zeek binary"
|
|
echo
|
|
usage
|
|
fi
|
|
|
|
if [ -z "${DATA_FILE}" ]; then
|
|
echo "Error: -d argument is required and should point at a pcap file to replay"
|
|
echo
|
|
usage
|
|
fi
|
|
|
|
if [ "${MODE}" != "file" -a -z "${INTERFACE}" ]; then
|
|
echo "Error: -i argument is required for the ${MODE} mode and should point to an idle network interface"
|
|
echo
|
|
usage
|
|
fi
|
|
|
|
# Various run-time options
|
|
ZEEK_CPU=10
|
|
TCPREPLAY_CPU=11
|
|
PRELOAD_LIBS=""
|
|
|
|
ZEEK_ARGS=""
|
|
if [ "${MODE}" != "file" ]; then
|
|
ZEEK_ARGS="-i af_packet::${INTERFACE}"
|
|
fi
|
|
|
|
if [ -n "${SEED_FILE}" ]; then
|
|
ZEEK_ARGS="${ZEEK_ARGS} -G ${SEED_FILE}"
|
|
fi
|
|
|
|
if [ "${MODE}" = "intf" ]; then
|
|
|
|
TIME_FILE=$(mktemp)
|
|
|
|
echo "####### Testing reading data file from a network interface #######"
|
|
echo "Running '${ZEEK_BUILD} ${ZEEK_ARGS}' against ${DATA_FILE}"
|
|
# Start zeek, find it's PID, then wait 10s to let it reach a steady state
|
|
taskset --cpu-list $ZEEK_CPU time -f "%M" -o $TIME_FILE $ZEEK_BUILD $ZEEK_ARGS &
|
|
TIME_PID=$!
|
|
|
|
sleep 5
|
|
ZEEK_PID=$(ps -ef | awk -v timepid="${TIME_PID}" '{ if ($3 == timepid) { print $2 } }')
|
|
renice -20 -p $ZEEK_PID >/dev/null
|
|
sleep 5
|
|
echo "Zeek running on PID ${ZEEK_PID}"
|
|
|
|
# Start perf stat on the zeek process
|
|
perf stat -p $ZEEK_PID &
|
|
PERF_PID=$!
|
|
|
|
# Start replaying the data
|
|
echo "Starting replay"
|
|
taskset --cpu-list $TCPREPLAY_CPU tcpreplay -i $INTERFACE -q $DATA_FILE
|
|
|
|
# Capture the average CPU usage of the process
|
|
CPU_USAGE=$(ps -p $ZEEK_PID -o %cpu=)
|
|
|
|
# Kill everything
|
|
echo
|
|
kill -2 $ZEEK_PID
|
|
wait $TIME_PID
|
|
wait $PERF_PID
|
|
|
|
echo "Maximum memory usage (max_rss): $(head -n 1 ${TIME_FILE}) bytes"
|
|
echo "Average CPU usage: ${CPU_USAGE}%"
|
|
|
|
rm $TIME_FILE
|
|
|
|
elif [ "${MODE}" = "file" ]; then
|
|
|
|
TIME_FILE=$(mktemp)
|
|
|
|
echo "####### Testing reading the file directly from disk #######"
|
|
taskset --cpu-list $ZEEK_CPU time -f "%e %M" -o $TIME_FILE $ZEEK_BUILD $ZEEK_ARGS -r $DATA_FILE
|
|
TIME_PID=$!
|
|
ZEEK_PID=$(ps -ef | awk -v timepid="${TIME_PID}" '{ if ($3 == timepid) { print $2 } }')
|
|
renice -20 -p $ZEEK_PID >/dev/null
|
|
awk '{print "Time spent: " $1 " seconds\nMax memory usage: " $2 " bytes"}' $TIME_FILE
|
|
|
|
rm $TIME_FILE
|
|
|
|
elif [ "${MODE}" = "flamegraph" ]; then
|
|
|
|
echo "####### Generating flamegraph data #######"
|
|
|
|
PERF_RECORD_FILE=$(mktemp)
|
|
PERF_COLLAPSED_FILE=$(mktemp)
|
|
|
|
# Start zeek under perf record, then sleep for a few seconds to let it actually start up. For runs with
|
|
# shorter amounts of data or with slower traffic, you can add '-c 499' here to get finer-grained results.
|
|
# With big data sets, it just results in the graph getting blown out by waits in the IO loop.
|
|
LD_PRELOAD=${PRELOAD_LIBS} perf record -g -o $PERF_RECORD_FILE -- $ZEEK_BUILD $ZEEK_ARGS &
|
|
PERF_PID=$!
|
|
|
|
sleep 5
|
|
|
|
ZEEK_PID=$(ps -ef | awk -v perfpid="${PERF_PID}" '{ if ($3 == perfpid) { print $2 } }')
|
|
echo "Zeek running on PID ${ZEEK_PID}"
|
|
|
|
# Start replaying the data
|
|
echo "Starting replay"
|
|
taskset --cpu-list $TCPREPLAY_CPU tcpreplay -i $INTERFACE -q $DATA_FILE
|
|
|
|
# Kill everything
|
|
echo
|
|
kill -2 $ZEEK_PID
|
|
wait $PERF_PID
|
|
|
|
echo
|
|
echo "####### Collapsing perf stack data #######"
|
|
perf script -i $PERF_RECORD_FILE | ${FLAMEGRAPH_PATH}/stackcollapse-perf.pl >$PERF_COLLAPSED_FILE
|
|
echo "####### Building normal flamegraph, writing to ${FLAMEGRAPH_PREFIX}.svg #######"
|
|
cat $PERF_COLLAPSED_FILE | ${FLAMEGRAPH_PATH}/flamegraph.pl >"${FLAMEGRAPH_PREFIX}.svg"
|
|
echo "####### Building reverse flamegraph, writing to ${FLAMEGRAPH_PREFIX}-reversed.svg #######"
|
|
cat $PERF_COLLAPSED_FILE | ${FLAMEGRAPH_PATH}/flamegraph.pl --reverse >"${FLAMEGRAPH_PREFIX}-reversed.svg"
|
|
|
|
rm $PERF_RECORD_FILE
|
|
rm $PERF_COLLAPSED_FILE
|
|
|
|
fi
|