scripts/csmith.sh

#!/usr/bin/env bash
#
# This script runs CSmith to generate N examples, and for each of them:
# 1. Compiles the code with the system compiler.
# 2. Runs the compiled code to obtain the checksum that the CSmith test
# computes.
# 3. Takes that checksum to generate an assertion.
# 4. Runs CBMC (for up to 90 seconds and with a loop unwinding of 257) to verify
# the generated assertion. If CBMC were to come up with a counterexample, then
# CBMC's symbolic execution would differ from the concrete execution of what
# ought to be a fully deterministic test. Such a failing test would therefore
# indicate a bug in CBMC. For such a failing case, the source code of the test
# is emitted.
# 5. If CBMC's execution completed successfully or timed out, the test is next
# compiled using goto-cc and dumped back to C code using goto-instrument.
# 6. The dumped C code is compiled using GCC and executed again, computing a
# fresh checksum.
# 7. This checksum is compared to the previous one to ensure that the program is
# equivalent to the original test generated by CSmith.
#
# The number "N" of such tests run can be specified as a command-line
# argument to this script. If no command-line arguments are provided, only a
# single test is run.
# 
# As a second command-line argument, a random seed can be provided. This is
# primarily useful to reproduce earlier failures. When a random seed is
# provided, N is forced to 1.
#
# The script assumes that csmith (including its development headers) is
# installed (or otherwise found on the PATH). cbmc, goto-cc, and goto-instrument
# must also be found via the system PATH.

set -e

workdir=$(mktemp -d csmith.XXX)
print_dir_and_seed() {
  echo "Failed test obtained with random seed $seed"
  echo "CSmith working directory: $workdir"
}

# Run a single (previously generated) test.
csmith_test() {
  trap print_dir_and_seed ERR

  local f=$1
  local r=$(cbmc --version)
  local bn=`basename $f`
  bn=`basename $bn .c`

  echo "Trying $f with build $r"

  # Compile and run (for up to 30 seconds) the CSmith-generated test.
  gcc -I/usr/include/csmith -w $f -o $bn
  check="`timeout 30 ./$bn | sed 's/checksum = /0x/'`"
  if [ -z "$check" ] ; then
    echo "TIMEOUT while executing CSmith test"
    return 0
  fi

  # Prepare the test CBMC by injecting the checksum as an assertion.
  gcc -E -I/usr/include/csmith -D__FRAMAC \
    -D'Frama_C_dump_assert_each()'="assert($check==(crc))" $f -o $bn.i
  # We don't model argv here, so make sure we don't end up with a spurious
  # null-pointer use.
  sed -i 's/strcmp(argv\[1\], "1")/0/' $bn.i
  # Run the test using CBMC for up to 90 seconds, unwinding loops up to 257
  # times.
  ec=0
  /usr/bin/time -v timeout -k15 90 cbmc \
    --unwind 257 --no-unwinding-assertions --object-bits 13 \
    $bn.i || ec=$?
  if [ $ec -eq 124 ] || [ $ec -eq 137 ] ; then
    echo "TIMEOUT in CBMC"
  elif [ $ec -eq 6 ] ; then
    echo "Limitation in CBMC"
  elif [ $ec -ne 0 ] ; then
    cat $f
    return 1
  fi

  # Compile the test using goto-cc and dump it back to C code using
  # goto-instrument.
  goto-cc -I /usr/include/csmith $f -o $bn
  goto-instrument --dump-c $bn ${bn}_dumped.c
  # Re-compile the dumped test and execute again to confirm the checksum hasn't
  # changed.
  gcc ${bn}_dumped.c -D__CPROVER_bool=_Bool -w -o $bn
  check_d="`timeout 30 ./$bn | sed 's/checksum = /0x/'`"
  if [ -z "$check_d" ] ; then
    echo "TIMEOUT while executing dumped CSmith test"
    return 0
  fi
  if [ "$check" != "$check_d" ] ; then
    echo "Checksum mismatch: GCC: $check dump-c: $check_d"
    cat $f
    return 1
  fi

  echo "OK: $f"
  echo
}

# Generate and run a number of tests.
csmith_random_test() {
  trap print_dir_and_seed ERR

  local fixed_seed=$2

  for i in `seq 1 $1`; do
    seed=`date +%s`
    if [ "x$fixed_seed" != x ] ; then
      seed=$fixed_seed
    fi
    echo "Random seed being used: $seed"
    csmith --seed $seed > t_$i.c
    csmith_test t_$i.c
  done
}

# Determine the number of tests to run from the command line; the default is to
# run a single test.
N=$1
if [ -z $N ] ; then
  N=1
fi

# A random seed may be provided on the command line. Really only makes sense
# when N is set to 1
SEED=$2
if [ "x$SEED" != "x" ] && [ $N != 1 ] ; then
  echo "Only running a single test with fixed random seed"
  N=1
fi

basedir=$PWD
cd $workdir
trap print_dir_and_seed ERR

csmith_random_test $N $SEED

cd $basedir
rm -rf $workdir