zeek/auxil/paraglob/tools/benchmark.cpp
Patrick Kelley 8fd444092b initial
2025-05-07 15:35:15 -04:00

165 lines
4.2 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "benchmark.h"
#include <random>
#include <memory>
std::random_device dev;
std::mt19937 rng(dev());
std::uniform_int_distribution<std::mt19937::result_type> dist(0,RAND_MAX);
int rand_int()
{
return dist(rng);
}
static const char* benchmark_pattern_words[] = {
"aaaaaa", "bb", "cccccccccccccccc", "ddddd", "eeeeeeeee", "fffffffffffff", "gggg"
};
const char* random_pattern_word(){
int idx = rand_int() % (sizeof(benchmark_pattern_words) / sizeof(const char*));
return benchmark_pattern_words[idx];
}
const char* random_word()
{
static char buffer[1024];
int j;
int rounds = (rand_int() % 25) + 5;
for ( j = 0; j < rounds; j++ ) {
buffer[j] = (char)((rand_int() % 26) + 'a');
}
buffer[rounds] = '\0';
return buffer;
}
double benchmark(char* a, char* b, char* c, bool silent) {
long num_patterns = atol(a);
long num_queries = atol(b);
long match_prob = atol(c);
return benchmark_n(num_patterns, num_queries, match_prob, silent);
}
double benchmark_n(long num_patterns, long num_queries, long match_prob, bool silent) {
if (!silent) {
std::cout << "creating workload:\n";
std::cout << "\t# patterns: " << num_patterns << "\n";
std::cout << "\t# queries: " << num_queries << "\n";
std::cout << "\t% matches: " << match_prob << "\n";
}
// Create the patterns.
std::unique_ptr<std::string[]> patterns(new std::string[num_patterns]);
char buffer[1024];
int i, j;
for ( i = 0; i < num_patterns; i++ ) {
buffer[0] = '\0';
int rounds = (rand_int() % 10) + 2;
for ( j = 0; j < rounds; j++ ) {
if ( j != 0 )
strcat(buffer, "*");
if ( (rand_int() % 10) == 0 ) {
strcat(buffer, random_pattern_word());
}
else {
strcat(buffer, random_word());
}
}
std::string s(buffer);
patterns[i] = s;
}
// Create the queries.
std::unique_ptr<std::string[]> queries(new std::string[num_queries]);
for ( i = 0; i < num_queries; i++ ) {
buffer[0] = '\0';
if ( (rand_int() % 100) <= match_prob ) {
// Create a likely match candidate.
int rounds = (rand_int() % 5) + 1;
for ( j = 0; j < rounds; j++ ) {
strcat(buffer, random_pattern_word());
}
}
else {
// Create a mismatch.
int rounds = (rand_int() % 50) + 5;
for ( j = 0; j < rounds; j++ ) {
buffer[j] = (char)((rand_int() % 26) + 'a');
}
buffer[rounds] = '\0';
}
queries[i] = std::string(strdup(buffer));
}
if (!silent) {
std::cout << "creating paraglob \n";
}
auto build_start = std::chrono::high_resolution_clock::now();
paraglob::Paraglob myGlob;
for ( i = 0; i < num_patterns; ++i ) {
const auto& p = patterns[i];
myGlob.add(p);
}
myGlob.compile();
auto build_finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> build_time = build_finish - build_start;
auto start = std::chrono::high_resolution_clock::now();
if (!silent) {
std::cout << "making queries \n";
}
for ( i = 0; i < num_queries; ++i ) {
const auto& q = queries[i];
myGlob.get(q);
}
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
if (!silent) {
std::cout << "Build time: " << build_time.count() << "s\n";
std::cout << "Search time: " << elapsed.count() << " s\n";
std::cout << "Queries/second: " << num_queries/elapsed.count() << "\n";
}
return elapsed.count() + build_time.count();
}
void makeGraphData() {
/*
prints data to the console for generation of 3d plot
of paraglob performance.
x axis is number of patterns
y axis is number of queries
z axis is the time taken to build and run the queries
*/
for(long patterns = 500; patterns <= 10000; patterns += 500) {
std::cout << "{ ";
for(long queries = 1000; queries <= 20000; queries += 1000) {
std::cout << benchmark_n(patterns, queries, 10, true);
if (queries != 20000) {
std::cout << ", ";
}
}
std::cout << "},\n";
}
}