What happens when you run a Hegel test

In the previous chapter we wrote a property-based test for an LRU cache. In this chapter we’ll go through it in more detail, and what happens when we run it.

In order to do this, let’s run it against a real, buggy, implementation of MyLRUCache. We’ll start with one that simply never evicts:

/// An "LRU cache" that never actually evicts anything: it just stores every
/// entry in a map. Because it ignores its capacity, its size grows without
/// bound.
pub struct MyLRUCache<K, V> {
    capacity: usize,
    entries: HashMap<K, V>,
}

impl<K: Hash + Eq, V> MyLRUCache<K, V> {
    pub fn new(capacity: usize) -> Self {
        Self {
            capacity,
            entries: HashMap::new(),
        }
    }

    pub fn capacity(&self) -> usize {
        self.capacity
    }

    pub fn put(&mut self, key: K, value: V) {
        // BUG: a real LRU cache would evict the least-recently-used entry once
        // it reached `self.capacity`. This one never does.
        self.entries.insert(key, value);
    }

    pub fn get(&self, key: &K) -> Option<&V> {
        self.entries.get(key)
    }

    pub fn size(&self) -> usize {
        self.entries.len()
    }
}

// MyLRUCache pretends to be an LRU cache but never evicts anything: it stores
// every entry in a map, so its size grows without bound.
type MyLRUCache[K comparable, V any] struct {
	capacity int
	entries  map[K]V
}

func NewMyLRUCache[K comparable, V any](capacity int) *MyLRUCache[K, V] {
	return &MyLRUCache[K, V]{capacity: capacity, entries: make(map[K]V)}
}

func (c *MyLRUCache[K, V]) Put(key K, value V) {
	// BUG: a real LRU cache would evict the least-recently-used entry once it
	// reached c.capacity. This one never does.
	c.entries[key] = value
}

func (c *MyLRUCache[K, V]) Get(key K) (V, bool) {
	value, ok := c.entries[key]
	return value, ok
}

func (c *MyLRUCache[K, V]) Size() int {
	return len(c.entries)
}

func (c *MyLRUCache[K, V]) Capacity() int {
	return c.capacity
}

// MyLRUCache pretends to be an LRU cache but never evicts anything: it stores
// every entry in a map, so its size grows without bound.
template <typename K, typename V>
class MyLRUCache {
public:
	explicit MyLRUCache(std::size_t capacity) : capacity_(capacity) {}

	void put(const K& key, const V& value) {
		// BUG: a real LRU cache would evict the least-recently-used entry once
		// it reached capacity_. This one never does.
		entries_[key] = value;
	}

	std::size_t size() const { return entries_.size(); }
	std::size_t capacity() const { return capacity_; }

private:
	std::size_t capacity_;
	std::unordered_map<K, V> entries_;
};

// MyLRUCache pretends to be an LRU cache but never evicts anything: it stores
// every entry in a map, so its size grows without bound.
export class MyLRUCache<K, V> {
	private capacity: number;
	private entries = new Map<K, V>();

	constructor(capacity: number) {
		this.capacity = capacity;
	}

	put(key: K, value: V): void {
		// BUG: a real LRU cache would evict the least-recently-used entry once
		// it reached `this.capacity`. This one never does.
		this.entries.set(key, value);
	}

	size(): number {
		return this.entries.size;
	}

	getCapacity(): number {
		return this.capacity;
	}
}

And here is the property-based test from the previous chapter, which we’ll run against it:

#[hegel::test]
fn test_respects_lru_capacity(tc: TestCase) {
    let capacity = tc.draw(gs::integers::<usize>().min_value(0));
    let mut cache = MyLRUCache::<String, i64>::new(capacity);

    let entries = tc.draw(gs::vecs(gs::tuples!(gs::text(), gs::integers::<i64>())));
    for (key, value) in entries {
        cache.put(key, value);
    }

    assert!(cache.size() <= capacity);
}

func TestRespectsLRUCapacity(t *testing.T) {
	hegel.Test(t, func(ht *hegel.T) {
		capacity := hegel.Draw(ht, hegel.Integers(0, math.MaxInt))
		cache := NewMyLRUCache[string, int](capacity)

		keys := hegel.Draw(ht, hegel.Lists(hegel.Text()))
		for _, key := range keys {
			value := hegel.Draw(ht, hegel.Integers(math.MinInt, math.MaxInt))
			cache.Put(key, value)
		}

		if cache.Size() > capacity {
			ht.Fatalf("cache size exceeds capacity")
		}
	})
}

TEST(MyLRUCache, RespectsCapacity) {
	hegel::test([](hegel::TestCase& tc) {
		auto capacity = tc.draw(gs::integers<size_t>({.min_value = 0}));
		MyLRUCache<std::string, int> cache(capacity);

		auto entries = tc.draw(
		  gs::vectors(gs::tuples(gs::text(), gs::integers<int>()))
		);
		for (const auto& [key, value] : entries) {
			cache.put(key, value);
		}

		// Hegel detects failures via thrown exceptions, so the property throws
		// rather than using a (non-throwing) gtest assertion.
		if (cache.size() > capacity) {
			throw std::runtime_error("cache size exceeds capacity");
		}
	});
}

test(
	"MyLRUCache respects capacity",
	hegel.test((tc) => {
		const capacity = tc.draw(gs.integers({ minValue: 0 }));
		const cache = new MyLRUCache<string, number>(capacity);

		const entries = tc.draw(gs.arrays(gs.tuples(gs.text(), gs.integers())));
		for (const [key, value] of entries) {
			cache.put(key, value);
		}

		expect(cache.size()).toBeLessThanOrEqual(capacity);
	}),
);

Because the cache never evicts, its size grows without bound, so the property is false: as soon as we insert more distinct keys than the capacity, the cache is too big. As a result, the test fails:

running 1 test
test test_respects_lru_capacity ... FAILED

failures:

---- test_respects_lru_capacity stdout ----
let capacity = 0;
let entries = [("", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru.rs:16:5:
assertion failed: cache.size() <= capacity

failures:
    test_respects_lru_capacity

test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out

--- FAIL: TestRespectsLRUCapacity
    lru_test.go:13: capacity := hegel.Draw(ht, hegel.Integers(0, math.MaxInt)) = 0
    lru_test.go:16: keys := hegel.Draw(ht, hegel.Lists(hegel.Text())) = []string{""}
    lru_test.go:18: value := hegel.Draw(ht, hegel.Integers(math.MinInt, math.MaxInt)) = 0
    lru_test.go:23: cache size exceeds capacity
    lru_test.go:12: property test failed: cache size exceeds capacity
FAIL
FAIL	lru-example

[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from MyLRUCache
[ RUN      ] MyLRUCache.RespectsCapacity
Generated: 0
Generated: [["",0]]
unknown file: Failure
C++ exception with description "
Hegel test failed: cache size exceeds capacity" thrown in the test body.

[  FAILED  ] MyLRUCache.RespectsCapacity
[----------] 1 test from MyLRUCache

[----------] Global test environment tear-down
[==========] 1 test from 1 test suite ran.
[  PASSED  ] 0 tests.
[  FAILED  ] 1 test, listed below:
[  FAILED  ] MyLRUCache.RespectsCapacity

 1 FAILED TEST

var draw_1 = 0;
var draw_2 = [ [ '', 0 ] ];

expected 1 to be less than or equal to 0

The reported failure is very straightforward: We set the capacity to zero, then we insert a single key (an empty string, with inserted value 0), and then check that the cache has at most zero elements in it, which it does not so the test fails.

We might reasonably think that this is a bug with the capacity zero case, and maybe we’re not interested in capacity zero caches, so we could modify the test as follows:

#[hegel::test]
fn test_respects_lru_capacity(tc: TestCase) {
    let capacity = tc.draw(gs::integers::<usize>().min_value(1));
    let mut cache = MyLRUCache::<String, i64>::new(capacity);

    let entries = tc.draw(gs::vecs(gs::tuples!(gs::text(), gs::integers::<i64>())));
    for (key, value) in entries {
        cache.put(key, value);
    }

    assert!(cache.size() <= capacity);
}

func TestRespectsLRUCapacity(t *testing.T) {
	hegel.Test(t, func(ht *hegel.T) {
		capacity := hegel.Draw(ht, hegel.Integers(1, math.MaxInt))
		cache := NewMyLRUCache[string, int](capacity)

		keys := hegel.Draw(ht, hegel.Lists(hegel.Text()))
		for _, key := range keys {
			value := hegel.Draw(ht, hegel.Integers(math.MinInt, math.MaxInt))
			cache.Put(key, value)
		}

		if cache.Size() > capacity {
			ht.Fatalf("cache size exceeds capacity")
		}
	})
}

TEST(MyLRUCache, RespectsCapacity) {
	hegel::test([](hegel::TestCase& tc) {
		auto capacity = tc.draw(gs::integers<size_t>({.min_value = 1}));
		MyLRUCache<std::string, int> cache(capacity);

		auto entries = tc.draw(
		  gs::vectors(gs::tuples(gs::text(), gs::integers<int>()))
		);
		for (const auto& [key, value] : entries) {
			cache.put(key, value);
		}

		// Hegel detects failures via thrown exceptions, so the property throws
		// rather than using a (non-throwing) gtest assertion.
		if (cache.size() > capacity) {
			throw std::runtime_error("cache size exceeds capacity");
		}
	});
}

test(
	"MyLRUCache respects capacity",
	hegel.test((tc) => {
		const capacity = tc.draw(gs.integers({ minValue: 1 }));
		const cache = new MyLRUCache<string, number>(capacity);

		const entries = tc.draw(gs.arrays(gs.tuples(gs.text(), gs.integers())));
		for (const [key, value] of entries) {
			cache.put(key, value);
		}

		expect(cache.size()).toBeLessThanOrEqual(capacity);
	}),
);

All we’ve changed is that the capacity is now at least one. This doesn’t help, though: the cache still never evicts, so the property is still false, and Hegel just finds the next-smallest failure instead:

running 1 test
test test_respects_lru_capacity ... FAILED

failures:

---- test_respects_lru_capacity stdout ----
let capacity = 1;
let entries = [("", 0), ("0", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_nonzero.rs:16:5:
assertion failed: cache.size() <= capacity

failures:
    test_respects_lru_capacity

test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out

--- FAIL: TestRespectsLRUCapacity
    lru_nonzero_test.go:15: capacity := hegel.Draw(ht, hegel.Integers(1, math.MaxInt)) = 1
    lru_nonzero_test.go:18: keys := hegel.Draw(ht, hegel.Lists(hegel.Text())) = []string{"", "0"}
    lru_nonzero_test.go:20: value := hegel.Draw(ht, hegel.Integers(math.MinInt, math.MaxInt)) = 0
    lru_nonzero_test.go:20: value := hegel.Draw(ht, hegel.Integers(math.MinInt, math.MaxInt)) = 0
    lru_nonzero_test.go:25: cache size exceeds capacity
    lru_nonzero_test.go:14: property test failed: cache size exceeds capacity
FAIL
FAIL	lru-example/nonzero

[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from MyLRUCache
[ RUN      ] MyLRUCache.RespectsCapacity
Generated: 1
Generated: [["",0],["0",0]]
unknown file: Failure
C++ exception with description "
Hegel test failed: cache size exceeds capacity" thrown in the test body.

[  FAILED  ] MyLRUCache.RespectsCapacity
[----------] 1 test from MyLRUCache

[----------] Global test environment tear-down
[==========] 1 test from 1 test suite ran.
[  PASSED  ] 0 tests.
[  FAILED  ] 1 test, listed below:
[  FAILED  ] MyLRUCache.RespectsCapacity

 1 FAILED TEST

var draw_1 = 1;
var draw_2 = [ [ '', 0 ], [ '0', 0 ] ];

expected 2 to be less than or equal to 1

This time the capacity is one, and we insert two distinct keys — the empty string and the string "0" — giving a cache of size two, which is larger than one. The bug was never really about capacity zero at all.

There are a couple of things worth observing about this test:

It does, correctly, fail, in a way that demonstrates the problem. Given that the thing it is testing is completely broken, this is a pretty low bar to clear, but it’s worth noting explicitly.¹
When it fails it prints the failing test case. This is a big difference between property-based tests and typical example-based tests: Because the test involves generated data, you need to be able to show the actual concrete values that were chosen.
The printed test case is quite simple. In this case it’s the simplest it could possibly be (according to some specific notion of “simplest”), but in general it will only be simplified.

These come from the basic lifecycle of a property-based test:

We run the test function multiple times (in Hegel, 100 times by default), with different generated values.
If any of them fail, we pick a failing test case and shrink it - running the test function many more times, with simpler variations of our current simplest failing test case.
Finally, we print it.

Shrinking in action

In order to see shrinking at work, we can use the verbosity setting to print every test case tried as we run:²

#[hegel::test(verbosity = Verbosity::Verbose)]
fn test_respects_lru_capacity(tc: TestCase) {
    let capacity = tc.draw(gs::integers::<usize>().min_value(1));
    let mut cache = MyLRUCache::<String, i64>::new(capacity);

    let entries = tc.draw(gs::vecs(gs::tuples!(gs::text(), gs::integers::<i64>())));
    for (key, value) in entries {
        cache.put(key, value);
    }

    assert!(cache.size() <= capacity);
}

TODO: the Go library (v0.5.3) does not yet expose a verbosity setting — its
`go test` integration always runs at "normal" verbosity.

TODO: the C++ library (v0.3.9) prints only the final counterexample, not the
intermediate test cases, so it cannot yet show the full verbose lifecycle.

TODO: the TypeScript library prints only the final counterexample, not the
intermediate test cases, so it cannot yet show the full verbose lifecycle.

This prints all intermediate test cases rather than just the final failing one. Here’s an excerpt of the most interesting bits:

running 1 test
Running test case
let capacity = 1;
let entries = [];
Running test case
let capacity = 3971;
let entries = [];
Running test case
let capacity = 46634;
let entries = [("úY\u{8b413}", -9223372036854739458)];
Running test case
let capacity = 142;
let entries = [("\u{94}ÈóA", -9223372036854751606), ("#¼", -9223372036854775631)];

    [ ... dozens more cases, each one passing ... ]

Running test case
let capacity = 1;
let entries = [("\u{98}§\u{96}", -9223372036854726090), ("3\u{1f}\u{90}\u{3cbe9}\nË^W", -6634910996680964021), ("\u{dd1c7}\u{16}\u{b}\u{8b}", -2421911336807628733), ("ù\u{52662}7", 3450639943582724138), ("\u{1b}Pဋ\u{13}S\u{95}5ä", -9223372033209782582), ("𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", -5779172081946973126), ("\u{c0db0}\u{84}o\u{1d}\u{88}", -9223372036854733404), ("N> ü", -9223372036854712834), ("\u{8f}Á𘏐\u{92da9}", -9223372036854775600), ("\u{86251}\u{92}_´𧇎𑚨\u{8cc31}", -9223372036854757126), ("ä", -9223372036854745451), ("", -9223372036854738920), ("ýV\u{9a}", -3145832809244121282)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity

    [ ... the same failure, simplified over and over ... ]

Running test case
let capacity = 1;
let entries = [("\u{98}§\u{96}", -9223372036854726090), ("3\u{1f}\u{90}\u{3cbe9}\nË^W", -6634910996680964021)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity
Running test case
let capacity = 1;
let entries = [("\u{98}§\u{96}", -9223372036854726090), ("", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity
Running test case
let capacity = 1;
let entries = [("\u{98}§\u{96}", -9223372036854726090)];
Running test case
let capacity = 1;
let entries = [("", 0)];

    [ ... some simplifications no longer fail, so they are discarded ... ]

Running test case
let capacity = 1;
let entries = [("", 0), ("000", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity
Running test case
let capacity = 1;
let entries = [("", 0), ("00", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity
Running test case
let capacity = 1;
let entries = [("", 0), ("0", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity
Running test case
let capacity = 1;
let entries = [("0", 0)];
let capacity = 1;
let entries = [("", 0), ("0", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity
test test_respects_lru_capacity ... FAILED

failures:
    test_respects_lru_capacity

test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out

TODO

TODO

TODO

You can see both the generation and the shrinking at play here. Initially, Hegel tries a variety of different test cases, with various different capacities and entries, until it finds one that fails (e.g. capacity = 1, with more than one entry). Then it switches to a shrink mode, where it tries deleting those entries, simplifying keys and values within them, etc. Once it can no longer shrink any further it replays the final shrunk example one last time and lets the shrunk failure that we saw propagate to the test runner.

Replaying a saved failure

We’ll now see one other piece of the property-based testing lifecycle: Replay. Once Hegel has found this failure, subsequent runs will start from there (until the bug is fixed). So if we run the verbose test a second time without changing anything, we don’t see the long search and shrink from before:

running 1 test
Running test case
let capacity = 1;
let entries = [("", 0), ("0", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity
let capacity = 1;
let entries = [("", 0), ("0", 0)];
thread 'test_respects_lru_capacity' panicked at tests/lru_verbose.rs:18:5:
assertion failed: cache.size() <= capacity

test test_respects_lru_capacity ... FAILED

failures:

failures:
    test_respects_lru_capacity

test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out

TODO

TODO

TODO

As well as significantly speeding up the test, this feature is an important part of making Hegel part of your development loop. Although a Hegel test may sometimes pass erroneously (because it failed to find the bug in its 100 test case budget), once it has found a bug, you may reliably use it as part of your development process because the test will keep failing until the bug is fixed.

It’s also not completely true! If you run these tests yourself, they will only fail most of the time. More on that in a second. ↩
Or we can in languages that support this, which turns out to only be our rust implementation right now. Sorry, we’re on it. ↩

Keyboard shortcuts

Property-based testing with Hegel

What happens when you run a Hegel test

Shrinking in action

Replaying a saved failure