//! Compile user-supplied regex rules and classify lines.
//!
//! A [`FilterSet`] combines three things:
//!
//! 1. Zero or more *include* patterns. If any are set, a line must match at
//! least one of them to pass.
//! 2. Zero or more *exclude* patterns. Any match causes the line to be dropped.
//! 3. An optional *extract* pattern with named capture groups. When present,
//! matching lines produce a [`MatchOutcome::Fields`] with ordered fields.
use std::collections::BTreeMap;
use anyhow::{anyhow, Result};
use regex::{Regex, RegexSet};
/// Result of applying a [`FilterSet`] to a single line.
#[derive(Debug, Clone)]
pub enum MatchOutcome {
/// Line passed without structured extraction.
Plain,
/// Line matched the extract pattern; named captures are preserved in order.
Fields(Vec<(String, String)>),
}
#[derive(Debug)]
pub struct FilterSet {
includes: Option<RegexSet>,
excludes: Option<RegexSet>,
extract: Option<Regex>,
field_order: Vec<String>,
}
impl FilterSet {
/// Compile a set of include, exclude, and extract patterns.
///
/// Each string is compiled independently. The returned [`FilterSet`] can
/// be applied to lines with [`FilterSet::apply`].
pub fn compile(
includes: &[String],
excludes: &[String],
extract: Option<&str>,
) -> Result<Self> {
let includes = if includes.is_empty() {
None
} else {
Some(RegexSet::new(includes).map_err(|e| anyhow!("invalid --match pattern: {e}"))?)
};
let excludes = if excludes.is_empty() {
None
} else {
Some(RegexSet::new(excludes).map_err(|e| anyhow!("invalid --exclude pattern: {e}"))?)
};
let (extract, field_order) = match extract {
Some(p) => {
let re = Regex::new(p).map_err(|e| anyhow!("invalid --extract pattern: {e}"))?;
let order: Vec<String> = re
.capture_names()
.flatten()
.map(|s| s.to_string())
.collect();
if order.is_empty() {
return Err(anyhow!(
"--extract pattern must contain at least one named capture group"
));
}
(Some(re), order)
}
None => (None, Vec::new()),
};
Ok(Self { includes, excludes, extract, field_order })
}
/// Apply the rules to a line. Returns `None` if the line is filtered out.
pub fn apply(&self, line: &str) -> Option<MatchOutcome> {
if let Some(excl) = &self.excludes {
if excl.is_match(line) {
return None;
}
}
if let Some(incl) = &self.includes {
if !incl.is_match(line) {
return None;
}
}
if let Some(re) = &self.extract {
match re.captures(line) {
Some(caps) => {
let mut fields = Vec::with_capacity(self.field_order.len());
for name in &self.field_order {
let val = caps.name(name).map(|m| m.as_str()).unwrap_or("").to_string();
fields.push((name.clone(), val));
}
Some(MatchOutcome::Fields(fields))
}
None => {
// Still emit the line in plain form; the renderer handles
// the mixed stream shape.
Some(MatchOutcome::Plain)
}
}
} else {
Some(MatchOutcome::Plain)
}
}
/// Field names in the order they appear in the extract regex. Used by the
/// renderer to align tabular output.
pub fn field_order(&self) -> &[String] {
&self.field_order
}
/// Returns a rough cost estimate for the filter, for test snapshots.
#[doc(hidden)]
pub fn cost_estimate(&self) -> BTreeMap<&'static str, usize> {
let mut m = BTreeMap::new();
m.insert("includes", self.includes.as_ref().map(|r| r.len()).unwrap_or(0));
m.insert("excludes", self.excludes.as_ref().map(|r| r.len()).unwrap_or(0));
m.insert("fields", self.field_order.len());
m
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn include_then_exclude() {
let fs = FilterSet::compile(
&["ERROR".into()],
&["healthz".into()],
None,
)
.unwrap();
assert!(matches!(fs.apply("ERROR: disk full"), Some(MatchOutcome::Plain)));
assert!(fs.apply("INFO: fine").is_none());
assert!(fs.apply("ERROR: healthz failed").is_none());
}
#[test]
fn extract_named_captures_in_order() {
let fs = FilterSet::compile(
&[],
&[],
Some(r"rid=(?P<rid>\w+) ms=(?P<ms>\d+)"),
)
.unwrap();
let line = "t=5 rid=abc123 ms=47 path=/x";
match fs.apply(line).unwrap() {
MatchOutcome::Fields(fs) => {
assert_eq!(fs, vec![
("rid".into(), "abc123".into()),
("ms".into(), "47".into()),
]);
}
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn extract_must_have_named_groups() {
let err = FilterSet::compile(&[], &[], Some(r"\d+")).unwrap_err();
assert!(err.to_string().contains("named capture"));
}
}