benches/fixtures.rs

//! Shared bench fixtures.
//!
//! Benchmarks generate deterministic input rather than reading from disk so
//! they run reproducibly on any machine without relying on repo state.

use std::time::Duration;

pub const DEFAULT_LINES: usize = 10_000;

/// Build a deterministic synthetic access log of `n` lines.
///
/// Fields are chosen so that roughly 20% of rows match `status >= 400` and
/// 5% match `status >= 500`, to approximate a real production mix.
pub fn access_log(n: usize) -> Vec<String> {
    let mut out = Vec::with_capacity(n);
    let hosts = ["api", "web", "worker", "batch", "edge"];
    for i in 0..n {
        let host = hosts[i % hosts.len()];
        let status = match i % 100 {
            0..=79 => 200,
            80..=89 => 301,
            90..=94 => 404,
            95..=98 => 500,
            _ => 503,
        };
        out.push(format!(
            "2025-04-18T10:00:{:02}Z host={} status={} msg=ok",
            i % 60,
            host,
            status,
        ));
    }
    out
}

pub fn structured_lines(n: usize) -> Vec<String> {
    (0..n)
        .map(|i| {
            format!(
                r#"{{"ts":"2025-04-18T10:00:00Z","seq":{},"host":"web","status":200}}"#,
                i
            )
        })
        .collect()
}

pub fn warmup() -> Duration {
    Duration::from_millis(500)
}

pub fn sample_size() -> usize {
    50
}