//! Shared bench fixtures.
//!
//! Benchmarks generate deterministic input rather than reading from disk so
//! they run reproducibly on any machine without relying on repo state.
use std::time::Duration;
pub const DEFAULT_LINES: usize = 10_000;
/// Build a deterministic synthetic access log of `n` lines.
///
/// Fields are chosen so that roughly 20% of rows match `status >= 400` and
/// 5% match `status >= 500`, to approximate a real production mix.
pub fn access_log(n: usize) -> Vec<String> {
let mut out = Vec::with_capacity(n);
let hosts = ["api", "web", "worker", "batch", "edge"];
for i in 0..n {
let host = hosts[i % hosts.len()];
let status = match i % 100 {
0..=79 => 200,
80..=89 => 301,
90..=94 => 404,
95..=98 => 500,
_ => 503,
};
out.push(format!(
"2025-04-18T10:00:{:02}Z host={} status={} msg=ok",
i % 60,
host,
status,
));
}
out
}
pub fn structured_lines(n: usize) -> Vec<String> {
(0..n)
.map(|i| {
format!(
r#"{{"ts":"2025-04-18T10:00:00Z","seq":{},"host":"web","status":200}}"#,
i
)
})
.collect()
}
pub fn warmup() -> Duration {
Duration::from_millis(500)
}
pub fn sample_size() -> usize {
50
}