src/filter/dsl/compile.rs

//! Compile an AST into a flat, cache-friendly form.
//!
//! The compiled form is an arena of [`Node`] values connected by indices.
//! Regexes are pre-built exactly once per filter. This avoids the overhead of
//! walking the borrowed AST on every input line, which matters at high tailing
//! rates (see `benches/filter_bench.rs`).

use regex::Regex;

use super::ast::{Cmp, Expr, Value};
use super::error::{DslError, DslResult};

#[derive(Debug, Clone)]
pub enum Node {
    And { left: usize, right: usize },
    Or { left: usize, right: usize },
    Not { inner: usize },
    CompareStr { field: String, op: Cmp, value: String },
    CompareInt { field: String, op: Cmp, value: i64 },
    Match { field: String, regex: Regex },
}

#[derive(Debug, Clone)]
pub struct CompiledFilter {
    pub(crate) nodes: Vec<Node>,
}

impl CompiledFilter {
    pub fn len(&self) -> usize {
        self.nodes.len()
    }

    pub fn is_empty(&self) -> bool {
        self.nodes.is_empty()
    }
}

pub fn compile(expr: &Expr) -> DslResult<CompiledFilter> {
    let mut nodes = Vec::new();
    compile_into(expr, &mut nodes)?;
    Ok(CompiledFilter { nodes })
}

fn compile_into(expr: &Expr, nodes: &mut Vec<Node>) -> DslResult<usize> {
    match expr {
        Expr::And(l, r) => {
            let left = compile_into(l, nodes)?;
            let right = compile_into(r, nodes)?;
            nodes.push(Node::And { left, right });
            Ok(nodes.len() - 1)
        }
        Expr::Or(l, r) => {
            let left = compile_into(l, nodes)?;
            let right = compile_into(r, nodes)?;
            nodes.push(Node::Or { left, right });
            Ok(nodes.len() - 1)
        }
        Expr::Not(inner) => {
            let idx = compile_into(inner, nodes)?;
            nodes.push(Node::Not { inner: idx });
            Ok(nodes.len() - 1)
        }
        Expr::Compare { field, op, value } => {
            let node = compile_compare(field.clone(), *op, value.clone())?;
            nodes.push(node);
            Ok(nodes.len() - 1)
        }
    }
}

fn compile_compare(field: String, op: Cmp, value: Value) -> DslResult<Node> {
    match (op, value) {
        (Cmp::Match, Value::Regex(pattern)) => {
            let regex = Regex::new(&pattern).map_err(|e| DslError::InvalidRegex {
                pattern,
                reason: e.to_string(),
            })?;
            Ok(Node::Match { field, regex })
        }
        (Cmp::Match, Value::Str(pattern)) => {
            let regex = Regex::new(&pattern).map_err(|e| DslError::InvalidRegex {
                pattern,
                reason: e.to_string(),
            })?;
            Ok(Node::Match { field, regex })
        }
        (Cmp::Match, Value::Int(_)) => Err(DslError::TypeMismatch {
            op: "~",
            kind: "integer",
        }),
        (op, Value::Int(n)) => Ok(Node::CompareInt {
            field,
            op,
            value: n,
        }),
        (op, Value::Str(s)) => Ok(Node::CompareStr {
            field,
            op,
            value: s,
        }),
        (op, Value::Regex(_)) => Err(DslError::TypeMismatch {
            op: match op {
                Cmp::Eq => "=",
                Cmp::Ne => "!=",
                Cmp::Lt => "<",
                Cmp::Le => "<=",
                Cmp::Gt => ">",
                Cmp::Ge => ">=",
                Cmp::Match => "~",
            },
            kind: "regex",
        }),
    }
}